Load library

library(readr)

## Warning: package 'readr' was built under R version 4.5.3

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.5.3

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(MASS)

## Warning: package 'MASS' was built under R version 4.5.3

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

library(car)

## Warning: package 'car' was built under R version 4.5.3

## Loading required package: carData

## Warning: package 'carData' was built under R version 4.5.3

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

library(pscl)

## Warning: package 'pscl' was built under R version 4.5.3

## Classes and Methods for R originally developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University (2002-2015),
## by and under the direction of Simon Jackman.
## hurdle and zeroinfl functions by Achim Zeileis.

Deskripsi Data

data <- read_csv("D:/Semester 4/ANMUL_MODUL 4/Employee Attrition.csv")

## Rows: 15787 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): dept, salary
## dbl (8): Emp ID, satisfaction_level, last_evaluation, number_project, averag...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

head(data)

## # A tibble: 6 × 10
##   `Emp ID` satisfaction_level last_evaluation number_project
##      <dbl>              <dbl>           <dbl>          <dbl>
## 1        1               0.38            0.53              2
## 2        2               0.8             0.86              5
## 3        3               0.11            0.88              7
## 4        4               0.72            0.87              5
## 5        5               0.37            0.52              2
## 6        6               0.41            0.5               2
## # ℹ 6 more variables: average_montly_hours <dbl>, time_spend_company <dbl>,
## #   Work_accident <dbl>, promotion_last_5years <dbl>, dept <chr>, salary <chr>

str(data)

## spc_tbl_ [15,787 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Emp ID               : num [1:15787] 1 2 3 4 5 6 7 8 9 10 ...
##  $ satisfaction_level   : num [1:15787] 0.38 0.8 0.11 0.72 0.37 0.41 0.1 0.92 0.89 0.42 ...
##  $ last_evaluation      : num [1:15787] 0.53 0.86 0.88 0.87 0.52 0.5 0.77 0.85 1 0.53 ...
##  $ number_project       : num [1:15787] 2 5 7 5 2 2 6 5 5 2 ...
##  $ average_montly_hours : num [1:15787] 157 262 272 223 159 153 247 259 224 142 ...
##  $ time_spend_company   : num [1:15787] 3 6 4 5 3 3 4 5 5 3 ...
##  $ Work_accident        : num [1:15787] 0 0 0 0 0 0 0 0 0 0 ...
##  $ promotion_last_5years: num [1:15787] 0 0 0 0 0 0 0 0 0 0 ...
##  $ dept                 : chr [1:15787] "sales" "sales" "sales" "sales" ...
##  $ salary               : chr [1:15787] "low" "medium" "medium" "low" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   `Emp ID` = col_double(),
##   ..   satisfaction_level = col_double(),
##   ..   last_evaluation = col_double(),
##   ..   number_project = col_double(),
##   ..   average_montly_hours = col_double(),
##   ..   time_spend_company = col_double(),
##   ..   Work_accident = col_double(),
##   ..   promotion_last_5years = col_double(),
##   ..   dept = col_character(),
##   ..   salary = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>

summary(data)

##      Emp ID      satisfaction_level last_evaluation  number_project 
##  Min.   :    1   Min.   :0.0900     Min.   :0.3600   Min.   :2.000  
##  1st Qu.: 3750   1st Qu.:0.4400     1st Qu.:0.5600   1st Qu.:3.000  
##  Median : 7500   Median :0.6400     Median :0.7200   Median :4.000  
##  Mean   : 7500   Mean   :0.6128     Mean   :0.7161   Mean   :3.803  
##  3rd Qu.:11250   3rd Qu.:0.8200     3rd Qu.:0.8700   3rd Qu.:5.000  
##  Max.   :14999   Max.   :1.0000     Max.   :1.0000   Max.   :7.000  
##  NA's   :788     NA's   :788        NA's   :788      NA's   :788    
##  average_montly_hours time_spend_company Work_accident    promotion_last_5years
##  Min.   : 96.0        Min.   : 2.000     Min.   :0.0000   Min.   :0.00000      
##  1st Qu.:156.0        1st Qu.: 3.000     1st Qu.:0.0000   1st Qu.:0.00000      
##  Median :200.0        Median : 3.000     Median :0.0000   Median :0.00000      
##  Mean   :201.1        Mean   : 3.498     Mean   :0.1446   Mean   :0.02127      
##  3rd Qu.:245.0        3rd Qu.: 4.000     3rd Qu.:0.0000   3rd Qu.:0.00000      
##  Max.   :310.0        Max.   :10.000     Max.   :1.0000   Max.   :1.00000      
##  NA's   :788          NA's   :788        NA's   :788      NA's   :788          
##      dept              salary         
##  Length:15787       Length:15787      
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##                                       
##

colSums(is.na(data))

##                Emp ID    satisfaction_level       last_evaluation 
##                   788                   788                   788 
##        number_project  average_montly_hours    time_spend_company 
##                   788                   788                   788 
##         Work_accident promotion_last_5years                  dept 
##                   788                   788                   788 
##                salary 
##                   788

Analisis Data

data <- na.omit(data)

colSums(is.na(data))

##                Emp ID    satisfaction_level       last_evaluation 
##                     0                     0                     0 
##        number_project  average_montly_hours    time_spend_company 
##                     0                     0                     0 
##         Work_accident promotion_last_5years                  dept 
##                     0                     0                     0 
##                salary 
##                     0

Statistika Deskriptif

hist(data$satisfaction_level, main="Distribusi Kepuasan", xlab="Satisfaction")

hist(data$number_project, main="Jumlah Proyek", xlab="Project")

hist(data$average_montly_hours, main="Jam Kerja", xlab="Hours")

boxplot(data$satisfaction_level, main="Outlier Satisfaction")

boxplot(data$average_montly_hours, main="Outlier Jam Kerja")

boxplot(data$number_project, main="Outlier Project")

barplot(table(data$salary), main="Distribusi Gaji")

barplot(table(data$dept), main="Distribusi Departemen")

barplot(table(data$promotion_last_5years), main="Promosi")

boxplot(satisfaction_level ~ salary, data=data, main="Satisfaction vs Salary")

boxplot(satisfaction_level ~ promotion_last_5years, data=data, main="Satisfaction vs Promotion")

boxplot(satisfaction_level ~ Work_accident, data=data, main="Satisfaction vs Work Accident")

summary(data)

##      Emp ID      satisfaction_level last_evaluation  number_project 
##  Min.   :    1   Min.   :0.0900     Min.   :0.3600   Min.   :2.000  
##  1st Qu.: 3750   1st Qu.:0.4400     1st Qu.:0.5600   1st Qu.:3.000  
##  Median : 7500   Median :0.6400     Median :0.7200   Median :4.000  
##  Mean   : 7500   Mean   :0.6128     Mean   :0.7161   Mean   :3.803  
##  3rd Qu.:11250   3rd Qu.:0.8200     3rd Qu.:0.8700   3rd Qu.:5.000  
##  Max.   :14999   Max.   :1.0000     Max.   :1.0000   Max.   :7.000  
##  average_montly_hours time_spend_company Work_accident    promotion_last_5years
##  Min.   : 96.0        Min.   : 2.000     Min.   :0.0000   Min.   :0.00000      
##  1st Qu.:156.0        1st Qu.: 3.000     1st Qu.:0.0000   1st Qu.:0.00000      
##  Median :200.0        Median : 3.000     Median :0.0000   Median :0.00000      
##  Mean   :201.1        Mean   : 3.498     Mean   :0.1446   Mean   :0.02127      
##  3rd Qu.:245.0        3rd Qu.: 4.000     3rd Qu.:0.0000   3rd Qu.:0.00000      
##  Max.   :310.0        Max.   :10.000     Max.   :1.0000   Max.   :1.00000      
##      dept              salary         
##  Length:14999       Length:14999      
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##

dim(data)

## [1] 14999    10

Pembentukan Variabel Ordinal

data$satisfaction_cat <- cut(
  data$satisfaction_level,
  breaks = quantile(data$satisfaction_level, probs = c(0, 0.33, 0.66, 1)),
  labels = c("Low", "Medium", "High"),
  include.lowest = TRUE
)

data$satisfaction_cat <- factor(
  data$satisfaction_cat,
  levels = c("Low","Medium","High"),
  ordered = TRUE
)

table(data$satisfaction_cat)

## 
##    Low Medium   High 
##   4999   5066   4934

data$promotion_last_5years <- factor(data$promotion_last_5years)

data$salary <- factor(
  data$salary,
  levels = c("low","medium","high"),
  ordered = TRUE
)

Uji Independensi

chisq.test(table(data$satisfaction_cat, data$salary))

## 
##  Pearson's Chi-squared test
## 
## data:  table(data$satisfaction_cat, data$salary)
## X-squared = 72.28, df = 4, p-value = 7.49e-15

chisq.test(table(data$satisfaction_cat, data$promotion_last_5years))

## 
##  Pearson's Chi-squared test
## 
## data:  table(data$satisfaction_cat, data$promotion_last_5years)
## X-squared = 10.886, df = 2, p-value = 0.004326

chisq.test(table(data$satisfaction_cat, data$number_project))

## 
##  Pearson's Chi-squared test
## 
## data:  table(data$satisfaction_cat, data$number_project)
## X-squared = 5058.7, df = 10, p-value < 2.2e-16

Model Regresi Logistik Ordinal

model <- polr(
  satisfaction_cat ~ 
    number_project +
    average_montly_hours +
    time_spend_company +
    promotion_last_5years +
    salary,
  data = data,
  Hess = TRUE
)

summary(model)

## Call:
## polr(formula = satisfaction_cat ~ number_project + average_montly_hours + 
##     time_spend_company + promotion_last_5years + salary, data = data, 
##     Hess = TRUE)
## 
## Coefficients:
##                            Value Std. Error t value
## number_project          0.009999  0.0140636  0.7110
## average_montly_hours    0.004794  0.0003344 14.3355
## time_spend_company     -0.069873  0.0104694 -6.6740
## promotion_last_5years1  0.307199  0.1038422  2.9583
## salary.L                0.163821  0.0395886  4.1381
## salary.Q               -0.025003  0.0294278 -0.8496
## 
## Intercepts:
##             Value   Std. Error t value
## Low|Medium   0.0191  0.0762     0.2503
## Medium|High  1.4490  0.0772    18.7672
## 
## Residual Deviance: 32646.74 
## AIC: 32662.74

Pengujian Signifikansi Parameter

a. Uji Serentak

model_null <- polr(satisfaction_cat ~ 1, data = data)

anova(model_null, model)

## Likelihood ratio tests of ordinal regression models
## 
## Response: satisfaction_cat
##                                                                                         Model
## 1                                                                                           1
## 2 number_project + average_montly_hours + time_spend_company + promotion_last_5years + salary
##   Resid. df Resid. Dev   Test    Df LR stat. Pr(Chi)
## 1     14997   32954.43                              
## 2     14991   32646.74 1 vs 2     6 307.6848       0

b. Uji Parsial

coef_table <- coef(summary(model))

p_values <- pnorm(abs(coef_table[, "t value"]), lower.tail = FALSE) * 2

coef_table <- cbind(coef_table, "p value" = p_values)

coef_table

##                               Value   Std. Error    t value      p value
## number_project          0.009999052 0.0140636010  0.7109880 4.770917e-01
## average_montly_hours    0.004793702 0.0003343928 14.3355411 1.312198e-46
## time_spend_company     -0.069872563 0.0104694339 -6.6739581 2.489947e-11
## promotion_last_5years1  0.307198535 0.1038422425  2.9583195 3.093213e-03
## salary.L                0.163821425 0.0395885524  4.1381009 3.501923e-05
## salary.Q               -0.025002554 0.0294278391 -0.8496225 3.955350e-01
## Low|Medium              0.019073304 0.0762117164  0.2502673 8.023806e-01
## Medium|High             1.448954921 0.0772066039 18.7672407 1.399691e-78

Odds Ratio

exp(coef(model))

##         number_project   average_montly_hours     time_spend_company 
##              1.0100492              1.0048052              0.9325126 
## promotion_last_5years1               salary.L               salary.Q 
##              1.3596109              1.1780039              0.9753074

Uji Multikolinearitas

vif(lm(
  as.numeric(satisfaction_cat) ~ 
    number_project +
    average_montly_hours +
    time_spend_company +
    promotion_last_5years +
    salary,
  data = data
))

##                           GVIF Df GVIF^(1/(2*Df))
## number_project        1.242729  1        1.114778
## average_montly_hours  1.214001  1        1.101817
## time_spend_company    1.050325  1        1.024854
## promotion_last_5years 1.014353  1        1.007151
## salary                1.012160  2        1.003026

Uji Kebaikan Model

pR2(model)

## fitting null model for pseudo-r2

##           llh       llhNull            G2      McFadden          r2ML 
## -1.632337e+04 -1.647721e+04  3.076848e+02  9.336677e-03  2.030472e-02 
##          r2CU 
##  2.284314e-02

Evaluasi Model

# Prediksi
pred <- predict(model, data)

head(pred)

## [1] Low  High High Low  Low  Low 
## Levels: Low Medium High

# Confusion matrix
table(
  Actual = data$satisfaction_cat,
  Predicted = pred
)

##         Predicted
## Actual    Low Medium High
##   Low    2834    430 1735
##   Medium 1996    882 2188
##   High   1760   1006 2168

# Akurasi
accuracy <- mean(as.character(pred) == as.character(data$satisfaction_cat))
accuracy

## [1] 0.3922928

Modul 4: Analisis Regresi Logistik Ordinal terhadap Faktor-Faktor yang Mempengaruhi Tingkat Kepuasan Karyawan - Analisis Multivariat (2024 B)

Nadine Putri Aulia (24031554094), Putri Sofiyatus Salwa (24031554112), Cintiya Agustin Nareswari (24031554218)

2026-05-01

Load library

Deskripsi Data

Analisis Data

Statistika Deskriptif

Pembentukan Variabel Ordinal

Uji Independensi

Model Regresi Logistik Ordinal

Pengujian Signifikansi Parameter

a. Uji Serentak

b. Uji Parsial

Odds Ratio

Uji Multikolinearitas

Uji Kebaikan Model

Evaluasi Model