Dokumen ini mendokumentasikan analisis kepuasan kerja karyawan menggunakan dua metode statistik:
JobSatisfaction berskala ordinal
(1–4).Dataset yang digunakan adalah IBM HR Employee Attrition
(WA_Fn-UseC_-HR-Employee-Attrition.csv).
# Instalasi (jalankan sekali jika belum tersedia)
# install.packages(c("MASS", "car", "brant", "lmtest", "pscl",
# "generalhoslem", "MVN", "heplots", "caret"))
library(MASS) # polr() untuk Ordinal Logistic Regression, lda()
library(car) # vif() untuk uji multikolinearitas
library(brant) # Uji asumsi proportional odds
library(lmtest) # lrtest() untuk likelihood ratio test
library(pscl) # pR2() untuk pseudo R-square
library(generalhoslem) # logitgof() untuk goodness-of-fit
library(MVN) # mvn() untuk uji normalitas multivariat
library(heplots) # boxM() untuk uji homogenitas varians
library(caret) # confusionMatrix() untuk evaluasi modelVariabel target JobSatisfaction diubah menjadi faktor
ordinal. Variabel yang tidak informatif secara analitik dihapus.
# Kategorik Nominal
data$BusinessTravel <- factor(data$BusinessTravel)
data$Department <- factor(data$Department)
data$EducationField <- factor(data$EducationField)
data$Gender <- factor(data$Gender)
data$JobRole <- factor(data$JobRole)
data$MaritalStatus <- factor(data$MaritalStatus)
data$OverTime <- factor(data$OverTime)
# Kategorik Ordinal
data$Education <- factor(data$Education, ordered = TRUE)
data$EnvironmentSatisfaction <- factor(data$EnvironmentSatisfaction, ordered = TRUE)
data$JobInvolvement <- factor(data$JobInvolvement, ordered = TRUE)
data$PerformanceRating <- factor(data$PerformanceRating, ordered = TRUE)
data$RelationshipSatisfaction<- factor(data$RelationshipSatisfaction,ordered = TRUE)
data$WorkLifeBalance <- factor(data$WorkLifeBalance, ordered = TRUE)
data$StockOptionLevel <- factor(data$StockOptionLevel, ordered = TRUE)## Warning in sqrt(diag(vc)): NaNs produced
## Call:
## polr(formula = JobSatisfaction ~ ., data = data, Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## Age 5.263e-03 0.0060649 0.8678
## BusinessTravelTravel_Frequently 9.222e-02 0.0019014 48.4994
## BusinessTravelTravel_Rarely -4.611e-02 0.0024093 -19.1374
## DailyRate 1.373e-04 0.0001192 1.1511
## DepartmentResearch & Development -2.528e-01 0.0131390 -19.2441
## DepartmentSales -8.941e-02 0.0134532 -6.6460
## DistanceFromHome -2.261e-03 0.0057824 -0.3910
## Education.L -7.841e-02 0.0006347 -123.5348
## Education.Q 5.644e-02 0.0004827 116.9247
## Education.C -2.419e-02 0.0013178 -18.3531
## Education^4 -1.648e-01 0.0011795 -139.7301
## EducationFieldLife Sciences 9.224e-02 0.0019875 46.4123
## EducationFieldMarketing -2.231e-01 0.0061177 -36.4731
## EducationFieldMedical -7.710e-02 0.0040546 -19.0159
## EducationFieldOther -2.348e-02 0.0001104 -212.6867
## EducationFieldTechnical Degree -1.151e-01 0.0008116 -141.8062
## EnvironmentSatisfaction.L -3.243e-02 0.0009777 -33.1715
## EnvironmentSatisfaction.Q 1.868e-02 0.0005156 36.2310
## EnvironmentSatisfaction.C -1.043e-02 0.0003934 -26.5209
## GenderMale 1.187e-01 0.0012238 97.0037
## HourlyRate -6.615e-03 0.0021466 -3.0816
## JobInvolvement.L -2.477e-01 0.0004656 -531.9502
## JobInvolvement.Q 1.319e-01 0.0013523 97.5078
## JobInvolvement.C -1.657e-01 0.0020173 -82.1335
## JobLevel 7.087e-02 0.0392628 1.8050
## JobRoleHuman Resources -5.769e-01 0.0004324 -1333.9957
## JobRoleLaboratory Technician -1.787e-01 0.0080497 -22.2054
## JobRoleManager -1.092e-01 0.0047673 -22.9023
## JobRoleManufacturing Director -2.058e-01 0.0005148 -399.7477
## JobRoleResearch Director -9.804e-02 0.0035305 -27.7692
## JobRoleResearch Scientist -4.694e-02 0.0101832 -4.6094
## JobRoleSales Executive -1.620e-01 0.0110074 -14.7155
## JobRoleSales Representative -3.446e-01 0.0006378 -540.3401
## MaritalStatusMarried 1.395e-01 0.0029885 46.6663
## MaritalStatusSingle 4.480e-01 0.0029297 152.9135
## MonthlyIncome -1.134e-05 NaN NaN
## MonthlyRate -1.287e-06 NaN NaN
## NumCompaniesWorked -3.952e-02 0.0207622 -1.9033
## OverTimeYes 9.906e-02 0.0012007 82.4979
## PercentSalaryHike 2.027e-02 0.0108419 1.8697
## PerformanceRating.L -9.721e-02 0.0038262 -25.4057
## RelationshipSatisfaction.L -2.276e-02 0.0007472 -30.4621
## RelationshipSatisfaction.Q -1.186e-01 0.0007622 -155.6501
## RelationshipSatisfaction.C -4.364e-02 0.0014957 -29.1770
## StockOptionLevel.L 2.211e-01 0.0010260 215.5053
## StockOptionLevel.Q -2.229e-01 0.0028972 -76.9357
## StockOptionLevel.C -9.824e-02 0.0015275 -64.3183
## TotalWorkingYears -8.167e-03 0.0109721 -0.7443
## TrainingTimesLastYear -1.169e-02 0.0344715 -0.3391
## WorkLifeBalance.L 4.069e-02 0.0020411 19.9335
## WorkLifeBalance.Q -9.011e-02 0.0007101 -126.8957
## WorkLifeBalance.C 2.008e-01 0.0021600 92.9790
## YearsAtCompany 7.456e-03 0.0164830 0.4523
## YearsInCurrentRole 1.352e-02 0.0212338 0.6367
## YearsSinceLastPromotion -1.043e-02 0.0188232 -0.5541
## YearsWithCurrManager -2.987e-02 0.0216235 -1.3813
##
## Intercepts:
## Value Std. Error t value
## 1|2 -1.6786 0.0013 -1317.3341
## 2|3 -0.7071 0.0090 -78.2485
## 3|4 0.5798 0.0608 9.5405
##
## Residual Deviance: 3947.718
## AIC: 4065.718
MonthlyIncome dan MonthlyRate dihapus
karena menyebabkan masalah konvergensi atau multikolinearitas tinggi.
Model full di-fit ulang.
data$MonthlyIncome <- NULL
data$MonthlyRate <- NULL
model_full <- polr(JobSatisfaction ~ ., data = data, Hess = TRUE)
summary(model_full)## Call:
## polr(formula = JobSatisfaction ~ ., data = data, Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## Age 0.0052737 0.0071224 0.74044
## BusinessTravelTravel_Frequently 0.0902966 0.1780827 0.50705
## BusinessTravelTravel_Rarely -0.0478348 0.1513893 -0.31597
## DailyRate 0.0001368 0.0001242 1.10136
## DepartmentResearch & Development -0.2602739 0.1436280 -1.81214
## DepartmentSales -0.0984579 0.1112090 -0.88534
## DistanceFromHome -0.0022572 0.0059055 -0.38223
## Education.L -0.0749148 0.1982901 -0.37780
## Education.Q 0.0549309 0.1717262 0.31988
## Education.C -0.0208917 0.1321653 -0.15807
## Education^4 -0.1660937 0.0957761 -1.73419
## EducationFieldLife Sciences 0.0948581 0.0888275 1.06789
## EducationFieldMarketing -0.2201305 0.1514031 -1.45394
## EducationFieldMedical -0.0738097 0.0972074 -0.75930
## EducationFieldOther -0.0177442 0.1721337 -0.10308
## EducationFieldTechnical Degree -0.1141035 0.1401003 -0.81444
## EnvironmentSatisfaction.L -0.0330745 0.0983158 -0.33641
## EnvironmentSatisfaction.Q 0.0183162 0.0978986 0.18709
## EnvironmentSatisfaction.C -0.0087654 0.0981019 -0.08935
## GenderMale 0.1179846 0.0973632 1.21180
## HourlyRate -0.0066128 0.0023000 -2.87515
## JobInvolvement.L -0.2456794 0.1727007 -1.42257
## JobInvolvement.Q 0.1304647 0.1389558 0.93889
## JobInvolvement.C -0.1670473 0.0945544 -1.76668
## JobLevel 0.0401665 0.1003285 0.40035
## JobRoleHuman Resources -0.5781947 0.1729922 -3.34232
## JobRoleLaboratory Technician -0.1710336 0.1997061 -0.85643
## JobRoleManager -0.1588492 0.2427293 -0.65443
## JobRoleManufacturing Director -0.2054188 0.2053873 -1.00015
## JobRoleResearch Director -0.1450202 0.2789034 -0.51997
## JobRoleResearch Scientist -0.0393009 0.1977414 -0.19875
## JobRoleSales Executive -0.1602660 0.1175650 -1.36321
## JobRoleSales Representative -0.3325398 0.1683090 -1.97577
## MaritalStatusMarried 0.1395607 0.1295046 1.07765
## MaritalStatusSingle 0.4460725 0.2064324 2.16086
## NumCompaniesWorked -0.0396454 0.0212281 -1.86760
## OverTimeYes 0.0986206 0.1078040 0.91481
## PercentSalaryHike 0.0200843 0.0165657 1.21240
## PerformanceRating.L -0.0958147 0.1273201 -0.75255
## RelationshipSatisfaction.L -0.0230791 0.0991947 -0.23266
## RelationshipSatisfaction.Q -0.1187758 0.0985733 -1.20495
## RelationshipSatisfaction.C -0.0430519 0.0965889 -0.44572
## StockOptionLevel.L 0.2222431 0.1753292 1.26758
## StockOptionLevel.Q -0.2214464 0.1485537 -1.49068
## StockOptionLevel.C -0.0990507 0.1239086 -0.79939
## TotalWorkingYears -0.0087160 0.0131138 -0.66465
## TrainingTimesLastYear -0.0113888 0.0370296 -0.30756
## WorkLifeBalance.L 0.0413810 0.1712747 0.24161
## WorkLifeBalance.Q -0.0887576 0.1388004 -0.63946
## WorkLifeBalance.C 0.2005538 0.0968285 2.07123
## YearsAtCompany 0.0075386 0.0170075 0.44325
## YearsInCurrentRole 0.0134435 0.0218111 0.61636
## YearsSinceLastPromotion -0.0108846 0.0192064 -0.56672
## YearsWithCurrManager -0.0293408 0.0221110 -1.32698
##
## Intercepts:
## Value Std. Error t value
## 1|2 -1.6668 0.0758 -21.9950
## 2|3 -0.6955 0.0906 -7.6772
## 3|4 0.5915 0.1023 5.7835
##
## Residual Deviance: 3947.821
## AIC: 4061.821
Nilai VIF > 10 mengindikasikan multikolinearitas yang perlu ditangani.
## GVIF Df GVIF^(1/(2*Df))
## Age 2.111663 1 1.453156
## BusinessTravel 1.061390 2 1.015006
## DailyRate 1.040860 1 1.020225
## Department 111.599192 2 3.250239
## DistanceFromHome 1.031953 1 1.015851
## Education 1.229642 4 1.026177
## EducationField 2.883379 5 1.111707
## EnvironmentSatisfaction 1.096776 3 1.015515
## Gender 1.035313 1 1.017504
## HourlyRate 1.029178 1 1.014484
## JobInvolvement 1.094957 3 1.015234
## JobLevel 6.618791 1 2.572701
## JobRole 488.267794 8 1.472452
## MaritalStatus 3.232352 2 1.340848
## NumCompaniesWorked 1.285470 1 1.133786
## OverTime 1.040900 1 1.020245
## PercentSalaryHike 2.598259 1 1.611912
## PerformanceRating 2.565574 1 1.601741
## RelationshipSatisfaction 1.113226 3 1.018038
## StockOptionLevel 3.346402 3 1.223009
## TotalWorkingYears 4.934254 1 2.221318
## TrainingTimesLastYear 1.041544 1 1.020561
## WorkLifeBalance 1.103110 3 1.016490
## YearsAtCompany 4.791056 1 2.188848
## YearsInCurrentRole 2.812035 1 1.676912
## YearsSinceLastPromotion 1.725325 1 1.313516
## YearsWithCurrManager 2.844163 1 1.686465
Dua pendekatan digunakan: standardized residuals dan Cook’s Distance. Observasi dengan |residual| > 3 atau Cook’s D melewati threshold dianggap outlier.
# Standardized residuals
res <- rstandard(model_lm)
plot(res, main = "Standardized Residuals", ylab = "Residual", xlab = "Index")
abline(h = c(-3, 3), col = "red", lty = 2)# Cook's Distance
cooks <- cooks.distance(model_lm)
plot(cooks, type = "h", main = "Cook's Distance", ylab = "Cook's D", xlab = "Index")
abline(h = 4 / length(cooks), col = "red", lty = 2)Untuk setiap variabel numerik, ditambahkan transformasi log sebagai prediktor. Jika koefisien log signifikan (p < 0.05), variabel dinyatakan tidak linear.
num_vars <- setdiff(names(data)[sapply(data, is.numeric)], "JobSatisfaction")
hasil_linearitas <- data.frame(
Variabel = character(),
p_value_log = numeric(),
Keputusan = character(),
Keterangan = character()
)
for (v in num_vars) {
data[[paste0(v, "_log")]] <- log(data[[v]] + 1)
formula_lin <- as.formula(paste("JobSatisfaction ~", v, "+", paste0(v, "_log")))
model_lin <- polr(formula_lin, data = data, Hess = TRUE)
coef_table <- coef(summary(model_lin))
if (!(paste0(v, "_log") %in% rownames(coef_table))) next
p_val <- pnorm(abs(coef_table[paste0(v, "_log"), "t value"]), lower.tail = FALSE) * 2
hasil_linearitas <- rbind(hasil_linearitas, data.frame(
Variabel = v,
p_value_log = p_val,
Keputusan = ifelse(p_val < 0.05, "Tolak H0", "Gagal Tolak H0"),
Keterangan = ifelse(p_val < 0.05, "Tidak Linear", "Linear")
))
}
# Bersihkan kolom _log sementara dari data
data <- data[, !grepl("_log$", names(data))]
hasil_linearitas## Variabel p_value_log Keputusan Keterangan
## 1 Age 9.723579e-01 Gagal Tolak H0 Linear
## 2 DailyRate 6.713550e-78 Tolak H0 Tidak Linear
## 3 DistanceFromHome 2.910307e-01 Gagal Tolak H0 Linear
## 4 HourlyRate 9.639715e-01 Gagal Tolak H0 Linear
## 5 JobLevel 7.153624e-01 Gagal Tolak H0 Linear
## 6 NumCompaniesWorked 7.373427e-01 Gagal Tolak H0 Linear
## 7 PercentSalaryHike 9.444475e-01 Gagal Tolak H0 Linear
## 8 TotalWorkingYears 8.893249e-01 Gagal Tolak H0 Linear
## 9 TrainingTimesLastYear 8.670123e-02 Gagal Tolak H0 Linear
## 10 YearsAtCompany 3.147814e-01 Gagal Tolak H0 Linear
## 11 YearsInCurrentRole 7.690251e-01 Gagal Tolak H0 Linear
## 12 YearsSinceLastPromotion 1.467784e-01 Gagal Tolak H0 Linear
## 13 YearsWithCurrManager 3.352051e-01 Gagal Tolak H0 Linear
DailyRate tidak memenuhi asumsi linearitas, sehingga
ditransformasi dengan log.
num_vars <- setdiff(names(data)[sapply(data, is.numeric)], "JobSatisfaction")
hasil_linearitas <- data.frame(
Variabel = character(),
p_value_log = numeric(),
Keputusan = character(),
Keterangan = character()
)
for (v in num_vars) {
data[[paste0(v, "_log")]] <- log(data[[v]] + 1)
formula_lin <- as.formula(paste("JobSatisfaction ~", v, "+", paste0(v, "_log")))
model_lin <- polr(formula_lin, data = data, Hess = TRUE)
coef_table <- coef(summary(model_lin))
if (!(paste0(v, "_log") %in% rownames(coef_table))) next
p_val <- pnorm(abs(coef_table[paste0(v, "_log"), "t value"]), lower.tail = FALSE) * 2
hasil_linearitas <- rbind(hasil_linearitas, data.frame(
Variabel = v,
p_value_log = p_val,
Keputusan = ifelse(p_val < 0.05, "Tolak H0", "Gagal Tolak H0"),
Keterangan = ifelse(p_val < 0.05, "Tidak Linear", "Linear")
))
}
data <- data[, !grepl("_log$", names(data))]
hasil_linearitas## Variabel p_value_log Keputusan Keterangan
## 1 Age 0.97235791 Gagal Tolak H0 Linear
## 2 DistanceFromHome 0.29103073 Gagal Tolak H0 Linear
## 3 HourlyRate 0.96397151 Gagal Tolak H0 Linear
## 4 JobLevel 0.71536236 Gagal Tolak H0 Linear
## 5 NumCompaniesWorked 0.73734272 Gagal Tolak H0 Linear
## 6 PercentSalaryHike 0.94444754 Gagal Tolak H0 Linear
## 7 TotalWorkingYears 0.88932492 Gagal Tolak H0 Linear
## 8 TrainingTimesLastYear 0.08670123 Gagal Tolak H0 Linear
## 9 YearsAtCompany 0.31478139 Gagal Tolak H0 Linear
## 10 YearsInCurrentRole 0.76902512 Gagal Tolak H0 Linear
## 11 YearsSinceLastPromotion 0.14677839 Gagal Tolak H0 Linear
## 12 YearsWithCurrManager 0.33520514 Gagal Tolak H0 Linear
## 13 DailyRate_trans 0.28357841 Gagal Tolak H0 Linear
H0: variabel kategorik independen terhadap
JobSatisfaction. Semua variabel tetap dipertahankan dalam
model meskipun gagal tolak H0.
vars_cat <- setdiff(names(data)[sapply(data, is.factor)], "JobSatisfaction")
hasil_independensi <- data.frame(
Variabel = character(),
ChiSquare = numeric(),
df = numeric(),
p_value = numeric(),
Keputusan = character()
)
for (v in vars_cat) {
tbl <- table(data[[v]], data$JobSatisfaction)
if (nrow(tbl) > 1 && ncol(tbl) > 1) {
test <- chisq.test(tbl)
hasil_independensi <- rbind(hasil_independensi, data.frame(
Variabel = v,
ChiSquare = round(test$statistic, 3),
df = test$parameter,
p_value = round(test$p.value, 5),
Keputusan = ifelse(test$p.value < 0.05, "Tolak H0", "Gagal Tolak H0")
))
}
}
hasil_independensi## Variabel ChiSquare df p_value Keputusan
## X-squared BusinessTravel 5.220 6 0.51595 Gagal Tolak H0
## X-squared1 Department 8.528 6 0.20190 Gagal Tolak H0
## X-squared2 Education 13.030 12 0.36689 Gagal Tolak H0
## X-squared3 EducationField 16.284 15 0.36345 Gagal Tolak H0
## X-squared4 EnvironmentSatisfaction 5.319 9 0.80564 Gagal Tolak H0
## X-squared5 Gender 2.548 3 0.46674 Gagal Tolak H0
## X-squared6 JobInvolvement 7.421 9 0.59333 Gagal Tolak H0
## X-squared7 JobRole 18.400 24 0.78320 Gagal Tolak H0
## X-squared8 MaritalStatus 2.842 6 0.82845 Gagal Tolak H0
## X-squared9 OverTime 3.688 3 0.29717 Gagal Tolak H0
## X-squared10 PerformanceRating 4.014 3 0.25992 Gagal Tolak H0
## X-squared11 RelationshipSatisfaction 2.963 9 0.96574 Gagal Tolak H0
## X-squared12 StockOptionLevel 2.759 9 0.97308 Gagal Tolak H0
## X-squared13 WorkLifeBalance 6.577 9 0.68112 Gagal Tolak H0
## Likelihood ratio test
##
## Model 1: JobSatisfaction ~ Age + BusinessTravel + Department + DistanceFromHome +
## Education + EducationField + EnvironmentSatisfaction + Gender +
## HourlyRate + JobInvolvement + JobLevel + JobRole + MaritalStatus +
## NumCompaniesWorked + OverTime + PercentSalaryHike + PerformanceRating +
## RelationshipSatisfaction + StockOptionLevel + TotalWorkingYears +
## TrainingTimesLastYear + WorkLifeBalance + YearsAtCompany +
## YearsInCurrentRole + YearsSinceLastPromotion + YearsWithCurrManager +
## DailyRate_trans
## Model 2: JobSatisfaction ~ 1
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 57 -1973.3
## 2 3 -1999.8 -54 52.942 0.5152
coef_table <- coef(summary(model_full))
p_values <- pnorm(abs(coef_table[, "t value"]), lower.tail = FALSE) * 2
cbind(coef_table, p_values)## Value Std. Error t value
## Age 0.005500533 0.007489232 0.73445891
## BusinessTravelTravel_Frequently 0.087420328 0.183852314 0.47549213
## BusinessTravelTravel_Rarely -0.047958132 0.157385035 -0.30471850
## DepartmentResearch & Development -0.267442960 0.641250346 -0.41706482
## DepartmentSales -0.099911964 0.659485313 -0.15149991
## DistanceFromHome -0.002212497 0.005928517 -0.37319575
## Education.L -0.074115448 0.200498328 -0.36965619
## Education.Q 0.054984975 0.172828056 0.31814843
## Education.C -0.020812445 0.132772295 -0.15675292
## Education^4 -0.161945923 0.096221856 -1.68304716
## EducationFieldLife Sciences 0.077639265 0.460051355 0.16876217
## EducationFieldMarketing -0.235993142 0.492391945 -0.47927905
## EducationFieldMedical -0.092046924 0.461838533 -0.19930542
## EducationFieldOther -0.033073284 0.497512801 -0.06647725
## EducationFieldTechnical Degree -0.131999894 0.479517555 -0.27527646
## EnvironmentSatisfaction.L -0.033148574 0.098425209 -0.33678947
## EnvironmentSatisfaction.Q 0.017205185 0.097959443 0.17563579
## EnvironmentSatisfaction.C -0.007199158 0.098202992 -0.07330895
## GenderMale 0.115145757 0.097908341 1.17605667
## HourlyRate -0.006686138 0.002380336 -2.80890565
## JobInvolvement.L -0.248951509 0.172934737 -1.43956913
## JobInvolvement.Q 0.133426372 0.139784939 0.95451179
## JobInvolvement.C -0.168720669 0.094697468 -1.78168089
## JobLevel 0.040404517 0.109503388 0.36897961
## JobRoleHuman Resources -0.585575352 0.684985019 -0.85487322
## JobRoleLaboratory Technician -0.165131745 0.225802192 -0.73131153
## JobRoleManager -0.159194905 0.342935331 -0.46421261
## JobRoleManufacturing Director -0.206526659 0.219397043 -0.94133748
## JobRoleResearch Director -0.146117354 0.293117554 -0.49849404
## JobRoleResearch Scientist -0.036620427 0.223754202 -0.16366364
## JobRoleSales Executive -0.163719357 0.438164783 -0.37364791
## JobRoleSales Representative -0.339782184 0.483609808 -0.70259573
## MaritalStatusMarried 0.139866130 0.130463935 1.07206738
## MaritalStatusSingle 0.452515272 0.207525660 2.18052684
## NumCompaniesWorked -0.040079582 0.021296459 -1.88198341
## OverTimeYes 0.098512172 0.107996203 0.91218181
## PercentSalaryHike 0.019646001 0.020636832 0.95198726
## PerformanceRating.L -0.092874506 0.148171261 -0.62680512
## RelationshipSatisfaction.L -0.024375577 0.099357308 -0.24533250
## RelationshipSatisfaction.Q -0.117787386 0.098715904 -1.19319563
## RelationshipSatisfaction.C -0.042527015 0.096700256 -0.43978182
## StockOptionLevel.L 0.219759070 0.175697720 1.25077929
## StockOptionLevel.Q -0.227285128 0.148829178 -1.52715436
## StockOptionLevel.C -0.101148753 0.124114290 -0.81496460
## TotalWorkingYears -0.008894309 0.013519630 -0.65788109
## TrainingTimesLastYear -0.011557775 0.037435493 -0.30873843
## WorkLifeBalance.L 0.044936122 0.171737065 0.26165652
## WorkLifeBalance.Q -0.085851255 0.139068282 -0.61733167
## WorkLifeBalance.C 0.198398354 0.097038982 2.04452221
## YearsAtCompany 0.007777030 0.017055735 0.45597741
## YearsInCurrentRole 0.012569035 0.021912175 0.57360965
## YearsSinceLastPromotion -0.010692298 0.019244121 -0.55561372
## YearsWithCurrManager -0.028874407 0.022164079 -1.30275693
## DailyRate_trans 0.115404293 0.073244189 1.57561022
## 1|2 -1.055893567 0.970929802 -1.08750763
## 2|3 -0.083795224 0.970601865 -0.08633326
## 3|4 1.204073230 0.971437172 1.23947617
## p_values
## Age 0.462669095
## BusinessTravelTravel_Frequently 0.634436241
## BusinessTravelTravel_Rarely 0.760580560
## DepartmentResearch & Development 0.676630996
## DepartmentSales 0.879581383
## DistanceFromHome 0.709002756
## Education.L 0.711638679
## Education.Q 0.750372350
## Education.C 0.875439578
## Education^4 0.092365962
## EducationFieldLife Sciences 0.865983716
## EducationFieldMarketing 0.631740126
## EducationFieldMedical 0.842023837
## EducationFieldOther 0.946997867
## EducationFieldTechnical Degree 0.783103850
## EnvironmentSatisfaction.L 0.736275611
## EnvironmentSatisfaction.Q 0.860580080
## EnvironmentSatisfaction.C 0.941560269
## GenderMale 0.239572231
## HourlyRate 0.004971021
## JobInvolvement.L 0.149989338
## JobInvolvement.Q 0.339824646
## JobInvolvement.C 0.074801285
## JobLevel 0.712142924
## JobRoleHuman Resources 0.392621336
## JobRoleLaboratory Technician 0.464588892
## JobRoleManager 0.642495427
## JobRoleManufacturing Director 0.346531942
## JobRoleResearch Director 0.618135868
## JobRoleResearch Scientist 0.869995944
## JobRoleSales Executive 0.708666282
## JobRoleSales Representative 0.482307727
## MaritalStatusMarried 0.283689767
## MaritalStatusSingle 0.029218432
## NumCompaniesWorked 0.059838268
## OverTimeYes 0.361673019
## PercentSalaryHike 0.341103441
## PerformanceRating.L 0.530786984
## RelationshipSatisfaction.L 0.806198986
## RelationshipSatisfaction.Q 0.232792764
## RelationshipSatisfaction.C 0.660095133
## StockOptionLevel.L 0.211015013
## StockOptionLevel.Q 0.126722632
## StockOptionLevel.C 0.415092572
## TotalWorkingYears 0.510614540
## TrainingTimesLastYear 0.757520510
## WorkLifeBalance.L 0.793586267
## WorkLifeBalance.Q 0.537015981
## WorkLifeBalance.C 0.040901985
## YearsAtCompany 0.648406224
## YearsInCurrentRole 0.566231977
## YearsSinceLastPromotion 0.578474953
## YearsWithCurrManager 0.192657758
## DailyRate_trans 0.115115660
## 1|2 0.276812529
## 2|3 0.931201499
## 3|4 0.215169207
Berdasarkan uji parsial, variabel yang signifikan (p < 0.05)
adalah: HourlyRate, MaritalStatusSingle, dan
WorkLifeBalance.C.
# Buat design matrix untuk mengekstrak kolom dummy yang relevan
X <- model.matrix(
~ Age + BusinessTravel + Department + DistanceFromHome + Education +
EducationField + EnvironmentSatisfaction + Gender + HourlyRate +
JobInvolvement + JobLevel + JobRole + MaritalStatus +
NumCompaniesWorked + OverTime + PercentSalaryHike + PerformanceRating +
RelationshipSatisfaction + StockOptionLevel + TotalWorkingYears +
TrainingTimesLastYear + WorkLifeBalance + YearsAtCompany +
YearsInCurrentRole + YearsSinceLastPromotion + YearsWithCurrManager +
DailyRate_trans,
data = data
)[, -1] # Hapus intercept
# Ambil kolom yang signifikan
X_sig <- X[, c("HourlyRate", "MaritalStatusSingle", "WorkLifeBalance.C")]
# Buat dataset baru
data_sig <- data.frame(JobSatisfaction = data$JobSatisfaction, X_sig)
# Fit model dengan ketiga variabel
model_selected <- polr(JobSatisfaction ~ ., data = data_sig, Hess = TRUE)
summary(model_selected)## Call:
## polr(formula = JobSatisfaction ~ ., data = data_sig, Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## HourlyRate -0.006163 0.002332 -2.643
## MaritalStatusSingle 0.102073 0.101189 1.009
## WorkLifeBalance.C 0.182999 0.082352 2.222
##
## Intercepts:
## Value Std. Error t value
## 1|2 -1.8333 0.1743 -10.5151
## 2|3 -0.8796 0.1685 -5.2213
## 3|4 0.3781 0.1673 2.2605
##
## Residual Deviance: 3986.817
## AIC: 3998.817
# Uji serentak
model_null_sig <- polr(JobSatisfaction ~ 1, data = data_sig, Hess = TRUE)
lrtest(model_selected, model_null_sig)## Likelihood ratio test
##
## Model 1: JobSatisfaction ~ HourlyRate + MaritalStatusSingle + WorkLifeBalance.C
## Model 2: JobSatisfaction ~ 1
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 6 -1993.4
## 2 3 -1999.8 -3 12.792 0.005109 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Uji parsial
coef_table <- coef(summary(model_selected))
p_values <- pnorm(abs(coef_table[, "t value"]), lower.tail = FALSE) * 2
cbind(coef_table, p_values)## Value Std. Error t value p_values
## HourlyRate -0.006163417 0.002331775 -2.643230 8.211924e-03
## MaritalStatusSingle 0.102072796 0.101189343 1.008731 3.131038e-01
## WorkLifeBalance.C 0.182999413 0.082351705 2.222169 2.627188e-02
## 1|2 -1.833282713 0.174347972 -10.515079 7.361901e-26
## 2|3 -0.879565015 0.168457300 -5.221294 1.776776e-07
## 3|4 0.378079577 0.167252989 2.260525 2.378868e-02
MaritalStatusSingle tidak signifikan secara parsial pada
model terpilih, sehingga model final hanya menggunakan
HourlyRate dan WorkLifeBalance.C.
model_final <- polr(
JobSatisfaction ~ HourlyRate + WorkLifeBalance.C,
data = data_sig,
Hess = TRUE
)
summary(model_final)## Call:
## polr(formula = JobSatisfaction ~ HourlyRate + WorkLifeBalance.C,
## data = data_sig, Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## HourlyRate -0.006242 0.00233 -2.679
## WorkLifeBalance.C 0.179466 0.08226 2.182
##
## Intercepts:
## Value Std. Error t value
## 1|2 -1.8693 0.1707 -10.9533
## 2|3 -0.9158 0.1646 -5.5652
## 3|4 0.3411 0.1631 2.0911
##
## Residual Deviance: 3987.836
## AIC: 3997.836
## Likelihood ratio test
##
## Model 1: JobSatisfaction ~ HourlyRate + WorkLifeBalance.C
## Model 2: JobSatisfaction ~ 1
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 5 -1993.9
## 2 3 -1999.8 -2 11.774 0.002776 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Uji parsial
coef_table <- coef(summary(model_final))
p_values <- pnorm(abs(coef_table[, "t value"]), lower.tail = FALSE) * 2
cbind(coef_table, p_values)## Value Std. Error t value p_values
## HourlyRate -0.006241525 0.002329667 -2.679149 7.380948e-03
## WorkLifeBalance.C 0.179465994 0.082259737 2.181699 2.913175e-02
## 1|2 -1.869310466 0.170661563 -10.953319 6.405691e-28
## 2|3 -0.915817302 0.164560329 -5.565237 2.617957e-08
## 3|4 0.341111269 0.163121750 2.091145 3.651505e-02
H0: model fit dengan data. p-value > 0.05 → model layak digunakan.
##
## Hosmer and Lemeshow test (multinomial model)
##
## data: data_sig$JobSatisfaction, fitted(model_final)
## X-squared = 15.994, df = 24, p-value = 0.8883
Data yang digunakan sama dengan data_sig hasil seleksi
OLR, namun JobSatisfaction diubah menjadi faktor
non-ordinal untuk keperluan LDA.
data_lda <- data_sig
data_lda$JobSatisfaction <- factor(data_lda$JobSatisfaction, ordered = FALSE)
str(data_lda)## 'data.frame': 1470 obs. of 4 variables:
## $ JobSatisfaction : Factor w/ 4 levels "1","2","3","4": 4 2 3 3 2 4 1 3 3 3 ...
## $ HourlyRate : num 94 61 92 56 40 79 81 67 44 94 ...
## $ MaritalStatusSingle: num 1 0 1 0 0 1 0 0 1 0 ...
## $ WorkLifeBalance.C : num -0.224 -0.671 -0.671 -0.671 -0.671 ...
H0: data prediktor mengikuti distribusi normal multivariat.
## NULL
H0: matriks varians-kovarians antar kelompok homogen.
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: data_lda[, -1] by data_lda$JobSatisfaction
## Chi-Sq (approx.) = 9.9644, df = 18, p-value = 0.9331
## HourlyRate MaritalStatusSingle WorkLifeBalance.C
## 1 1.3826677 1.4581537 0.02755972
## 2 -0.2405949 -0.6853322 -0.75153240
## 3 1.2842882 1.4581537 -0.75153240
## 4 -0.4865438 -0.6853322 -0.75153240
## 5 -1.2735802 -0.6853322 -0.75153240
## 6 0.6448211 1.4581537 1.58574395
## Call:
## lda(y_lda ~ ., data = data.frame(y_lda, x_lda))
##
## Prior probabilities of groups:
## 1 2 3 4
## 0.1965986 0.1904762 0.3006803 0.3122449
##
## Group means:
## HourlyRate MaritalStatusSingle WorkLifeBalance.C
## 1 0.13505160 -0.02522757 -0.05601071
## 2 0.03644896 -0.01166523 -0.05591443
## 3 -0.04962282 -0.02579810 -0.01474392
## 4 -0.05948221 0.04784269 0.08357288
##
## Coefficients of linear discriminants:
## LD1 LD2 LD3
## HourlyRate -0.7655056 0.6333520 -0.1326226
## MaritalStatusSingle 0.2584522 0.5322754 0.8066925
## WorkLifeBalance.C 0.5908487 0.5950845 -0.5468204
##
## Proportion of trace:
## LD1 LD2 LD3
## 0.8312 0.1616 0.0072
## [1] 4 3 4 3 3 4
## Levels: 1 2 3 4
## Actual
## Predicted 1 2 3 4
## 1 0 0 0 0
## 2 0 0 0 0
## 3 137 135 192 190
## 4 152 145 250 269
acc_lda <- mean(kelas_lda == y_lda)
error_lda <- 1 - acc_lda
cat("Akurasi :", round(acc_lda * 100, 2), "%\n")## Akurasi : 31.36 %
## Error Rate: 68.64 %
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3 4
## 1 0 0 0 0
## 2 0 0 0 0
## 3 137 135 192 190
## 4 152 145 250 269
##
## Overall Statistics
##
## Accuracy : 0.3136
## 95% CI : (0.2899, 0.338)
## No Information Rate : 0.3122
## P-Value [Acc > NIR] : 0.465
##
## Kappa : 0.0094
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3 Class: 4
## Sensitivity 0.0000 0.0000 0.4344 0.5861
## Specificity 1.0000 1.0000 0.5506 0.4590
## Pos Pred Value NaN NaN 0.2936 0.3297
## Neg Pred Value 0.8034 0.8095 0.6936 0.7095
## Prevalence 0.1966 0.1905 0.3007 0.3122
## Detection Rate 0.0000 0.0000 0.1306 0.1830
## Detection Prevalence 0.0000 0.0000 0.4449 0.5551
## Balanced Accuracy 0.5000 0.5000 0.4925 0.5225
| Metode | Variabel Signifikan | Akurasi |
|---|---|---|
| OLR | HourlyRate, WorkLifeBalance.C |
Lihat Pseudo R² & GoF |
| LDA | HourlyRate, WorkLifeBalance.C
(scaled) |
Lihat output eval_lda |