Pendahuluan

Dataset: https://www.kaggle.com/datasets/mdmahmudulhasansuzan/students-adaptability-level-in-online-education

Variabel Dependen (Y): Adaptivity_Level — Ordinal dengan tiga kategori: Low < Moderate < High

Variabel Independen (X):

Variabel Tipe Keterangan
Gender Kategorik (Boy / Girl) Jenis kelamin siswa
Age Kategorik (1-5, 6-10, 11-15, 16-20, 21-25, 26-30) Rentang usia siswa
Education_Level Kategorik (School / College / University) Jenjang pendidikan siswa
Location Kategorik (Yes / No) Lokasi tempat tinggal (perkotaan/pedesaan)
Financial_Condition Kategorik (Poor / Mid / Rich) Kondisi finansial keluarga
Load_shedding Kategorik (Low / High) Tingkat pemadaman listrik
Network_Type Kategorik (2G / 3G / 4G) Jenis jaringan internet yang digunakan
Class_Duration Kategorik (0 / 1-3 / 3-6) Durasi kelas online (jam per hari)
Self_Lms Kategorik (No / Yes) Penggunaan LMS mandiri oleh siswa

Tujuan: Memodelkan faktor-faktor yang memengaruhi tingkat adaptasi siswa dalam pendidikan online menggunakan Ordinal Logistic Regression.


Install & Load Package

# install.packages(c("MASS", "brant", "car", "dplyr", "tidyr", "ggplot2"), quiet = TRUE)

library(MASS)
library(brant)
library(car)
library(dplyr)
library(tidyr)
library(ggplot2)

Load Data

df <- read.csv("students_adaptability_level_online_education.csv")
head(df, 10)
Gender Age Education.Level Institution.Type IT.Student Location Load.shedding Financial.Condition Internet.Type Network.Type Class.Duration Self.Lms Device Adaptivity.Level
Boy 21-25 University Non Government No Yes Low Mid Wifi 4G 3-6 No Tab Moderate
Girl 21-25 University Non Government No Yes High Mid Mobile Data 4G 1-3 Yes Mobile Moderate
Girl 16-20 College Government No Yes Low Mid Wifi 4G 1-3 No Mobile Moderate
Girl 11-15 School Non Government No Yes Low Mid Mobile Data 4G 1-3 No Mobile Moderate
Girl 16-20 School Non Government No Yes Low Poor Mobile Data 3G 0 No Mobile Low
Boy 11-15 School Non Government No Yes Low Poor Mobile Data 3G 1-3 No Mobile Low
Boy 11-15 School Non Government No Yes Low Mid Wifi 4G 0 No Mobile Low
Boy 11-15 School Non Government No Yes Low Mid Wifi 4G 1-3 No Mobile Moderate
Boy 16-20 College Government No Yes Low Mid Wifi 4G 1-3 No Mobile Low
Boy 11-15 School Non Government No Yes Low Mid Mobile Data 3G 1-3 No Mobile Moderate

Info data

str(df)
## 'data.frame':    1205 obs. of  14 variables:
##  $ Gender             : chr  "Boy" "Girl" "Girl" "Girl" ...
##  $ Age                : chr  "21-25" "21-25" "16-20" "11-15" ...
##  $ Education.Level    : chr  "University" "University" "College" "School" ...
##  $ Institution.Type   : chr  "Non Government" "Non Government" "Government" "Non Government" ...
##  $ IT.Student         : chr  "No" "No" "No" "No" ...
##  $ Location           : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ Load.shedding      : chr  "Low" "High" "Low" "Low" ...
##  $ Financial.Condition: chr  "Mid" "Mid" "Mid" "Mid" ...
##  $ Internet.Type      : chr  "Wifi" "Mobile Data" "Wifi" "Mobile Data" ...
##  $ Network.Type       : chr  "4G" "4G" "4G" "4G" ...
##  $ Class.Duration     : chr  "3-6" "1-3" "1-3" "1-3" ...
##  $ Self.Lms           : chr  "No" "Yes" "No" "No" ...
##  $ Device             : chr  "Tab" "Mobile" "Mobile" "Mobile" ...
##  $ Adaptivity.Level   : chr  "Moderate" "Moderate" "Moderate" "Moderate" ...

Statistika Deskriptif

summary(df)
##     Gender              Age            Education.Level    Institution.Type  
##  Length:1205        Length:1205        Length:1205        Length:1205       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##   IT.Student          Location         Load.shedding      Financial.Condition
##  Length:1205        Length:1205        Length:1205        Length:1205        
##  Class :character   Class :character   Class :character   Class :character   
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character   
##  Internet.Type      Network.Type       Class.Duration       Self.Lms        
##  Length:1205        Length:1205        Length:1205        Length:1205       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##     Device          Adaptivity.Level  
##  Length:1205        Length:1205       
##  Class :character   Class :character  
##  Mode  :character   Mode  :character

Cek Missing Values

colSums(is.na(df))
##              Gender                 Age     Education.Level    Institution.Type 
##                   0                   0                   0                   0 
##          IT.Student            Location       Load.shedding Financial.Condition 
##                   0                   0                   0                   0 
##       Internet.Type        Network.Type      Class.Duration            Self.Lms 
##                   0                   0                   0                   0 
##              Device    Adaptivity.Level 
##                   0                   0

Eksplorasi Data

Distribusi Variabel Kategorik

df %>%
  select(Gender, Age, Education.Level, Location, Financial.Condition,
         Load.shedding, Network.Type, Class.Duration, Self.Lms, Adaptivity.Level) %>%
  pivot_longer(cols = everything()) %>%
  ggplot(aes(x = value)) +
  geom_bar(fill = "orange", color = "white") +
  facet_wrap(~name, scales = "free") +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Distribusi Variabel Kategorik", x = NULL, y = "Frekuensi")


Persiapan Data

Pilih Variabel yang digunakan

ordinaldf <- df[, c("Adaptivity.Level", "Gender", "Age", "Education.Level",
                    "Location", "Financial.Condition", "Load.shedding",
                    "Network.Type", "Class.Duration", "Self.Lms")]

# Rename kolom agar lebih rapi
colnames(ordinaldf) <- c("Adaptivity_Level", "Gender", "Age", "Education_Level",
                          "Location", "Financial_Condition", "Load_shedding",
                          "Network_Type", "Class_Duration", "Self_Lms")

Konversi Y ke ordered factor

ordinaldf$Adaptivity_Level <- factor(
  ordinaldf$Adaptivity_Level,
  levels  = c("Low", "Moderate", "High"),
  ordered = TRUE
)

Konversi variabel kategorik ke faktor

ordinaldf$Gender <- factor(
  ordinaldf$Gender,
  levels = c("Boy", "Girl")
)

ordinaldf$Age <- factor(
  ordinaldf$Age,
  levels = c("1-5", "6-10", "11-15", "16-20", "21-25", "26-30")
)

ordinaldf$Education_Level <- factor(
  ordinaldf$Education_Level,
  levels = c("School", "College", "University")
)

ordinaldf$Location <- factor(
  ordinaldf$Location,
  levels = c("No", "Yes")
)

ordinaldf$Financial_Condition <- factor(
  ordinaldf$Financial_Condition,
  levels = c("Poor", "Mid", "Rich")
)

ordinaldf$Load_shedding <- factor(
  ordinaldf$Load_shedding,
  levels = c("Low", "High")
)

ordinaldf$Network_Type <- factor(
  ordinaldf$Network_Type,
  levels = c("2G", "3G", "4G")
)

ordinaldf$Class_Duration <- factor(
  ordinaldf$Class_Duration,
  levels = c("0", "1-3", "3-6")
)

ordinaldf$Self_Lms <- factor(
  ordinaldf$Self_Lms,
  levels = c("No", "Yes")
)

cat("Dimensi data:", dim(ordinaldf), "\n")
## Dimensi data: 1205 10
head(ordinaldf)
Adaptivity_Level Gender Age Education_Level Location Financial_Condition Load_shedding Network_Type Class_Duration Self_Lms
Moderate Boy 21-25 University Yes Mid Low 4G 3-6 No
Moderate Girl 21-25 University Yes Mid High 4G 1-3 Yes
Moderate Girl 16-20 College Yes Mid Low 4G 1-3 No
Moderate Girl 11-15 School Yes Mid Low 4G 1-3 No
Low Girl 16-20 School Yes Poor Low 3G 0 No
Low Boy 11-15 School Yes Poor Low 3G 1-3 No

Uji Asumsi

Catatan: Dalam Ordinal Logistic Regression, asumsi yang wajib dipenuhi adalah Tidak Ada Multikolinearitas. Asumsi lainnya (variabel dependen ordinal, independensi observasi, dan tidak ada outlier) bersifat opsional diujikan sebagai kelengkapan analisis, namun tidak menjadi syarat mutlak.

Asumsi 1: Variabel Dependen Bersifat Ordinal

Asumsi ini mensyaratkan bahwa variabel dependen harus berupa data ordinal, yaitu data kategorik yang memiliki urutan atau tingkatan yang bermakna antar kategorinya.

cat("Class:", class(ordinaldf$Adaptivity_Level), "\n")
## Class: ordered factor
cat("Is ordered:", is.ordered(ordinaldf$Adaptivity_Level), "\n")
## Is ordered: TRUE
cat("Levels:", paste(levels(ordinaldf$Adaptivity_Level), collapse = " < "), "\n\n")
## Levels: Low < Moderate < High
tbl <- table(ordinaldf$Adaptivity_Level)
data.frame(
  Kategori  = names(tbl),
  Frekuensi = as.integer(tbl),
  Proporsi  = paste0(round(prop.table(tbl) * 100, 2), "%")
)
Kategori Frekuensi Proporsi
Low 480 39.83%
Moderate 625 51.87%
High 100 8.3%

TerpenuhiAdaptivity_Level merupakan ordered factor dengan urutan Low < Moderate < High. Variabel ini mencerminkan tingkatan adaptasi siswa, sehingga memenuhi syarat sebagai variabel dependen ordinal.


Asumsi 2: Independensi Observasi

Asumsi ini mensyaratkan bahwa setiap observasi (baris data) harus bersifat independen satu sama lain, artinya nilai dari satu pengamatan tidak memengaruhi atau bergantung pada pengamatan lainnya.

Tidak dapat dipastikan sepenuhnya Secara teknis tidak ditemukan baris yang identik sempurna. Namun, dataset ini berasal dari survei siswa tanpa ID unik per individu, sehingga independensi antar responden diasumsikan terpenuhi berdasarkan desain pengumpulan data. Dalam praktik, asumsi ini umumnya dipenuhi selama tidak ada pengukuran berulang pada subjek yang sama.


Asumsi 3: Tidak Ada Multikolinearitas

Multikolinearitas terjadi ketika dua atau lebih variabel independen memiliki korelasi yang tinggi satu sama lain. Kondisi ini dapat menyebabkan estimasi koefisien menjadi tidak stabil dan interpretasi menjadi sulit.

Karena semua variabel dalam model ini bersifat kategorik (dengan lebih dari 2 kategori), fungsi vif() dari package car secara otomatis menghitung Generalized VIF (GVIF). Nilai GVIF mentah tidak dapat dibandingkan langsung dengan threshold 5 atau 10, karena nilainya dipengaruhi oleh jumlah derajat bebas (Df) tiap variabel. Nilai yang tepat untuk dievaluasi adalah kolom GVIF^(1/(2*Df)) yaitu nilai yang sudah disesuaikan dan setara dengan √VIF pada variabel numerik.

Kriteria yang digunakan:

Nilai GVIF^(1/(2*Df)) Interpretasi
< √5 ≈ 2.236 Aman, tidak ada multikolinearitas
√5 s.d. √10 (2.236 – 3.162) Peringatan, perlu diperhatikan
> √10 ≈ 3.162 Multikolinearitas serius
ordinaldf$Y_num <- as.numeric(ordinaldf$Adaptivity_Level)

model_ols <- lm(
  Y_num ~ Gender + Age + Education_Level + Location +
          Financial_Condition + Load_shedding +
          Network_Type + Class_Duration + Self_Lms,
  data = ordinaldf
)

# Tampilkan tabel GVIF lengkap
gvif_full <- vif(model_ols)
print(round(gvif_full, 4))
##                        GVIF Df GVIF^(1/(2*Df))
## Gender               1.1447  1          1.0699
## Age                 36.0697  5          1.4312
## Education_Level     27.4979  2          2.2899
## Location             1.3698  1          1.1704
## Financial_Condition  1.2880  2          1.0653
## Load_shedding        1.2545  1          1.1201
## Network_Type         1.4492  2          1.0972
## Class_Duration       1.5466  2          1.1152
## Self_Lms             1.5296  1          1.2368
# Ekstrak kolom GVIF^(1/(2*Df)) — kolom ke-3
gvif_adj <- gvif_full[, 3]

cat("Nilai tertinggi :", round(max(gvif_adj), 4), "\n")
## Nilai tertinggi : 2.2899
cat("Nilai terendah  :", round(min(gvif_adj), 4), "\n")
## Nilai terendah  : 1.0653
cat("Threshold aman  : <", round(sqrt(5),  4), "(setara VIF < 5)\n")
## Threshold aman  : < 2.2361 (setara VIF < 5)
cat("Threshold kritis: <", round(sqrt(10), 4), "(setara VIF < 10)\n")
## Threshold kritis: < 3.1623 (setara VIF < 10)
ordinaldf$Y_num <- NULL
ordinaldf$Y_num <- as.numeric(ordinaldf$Adaptivity_Level)
model_ols_viz <- lm(
  Y_num ~ Gender + Age + Education_Level + Location +
          Financial_Condition + Load_shedding +
          Network_Type + Class_Duration + Self_Lms,
  data = ordinaldf
)
gvif_vals <- vif(model_ols_viz)
ordinaldf$Y_num <- NULL

gvif_adj_vals <- gvif_vals[, 3]

thr_warn <- sqrt(5)   # ≈ 2.236
thr_crit <- sqrt(10)  # ≈ 3.162

vif_df <- data.frame(
  Variabel = rownames(gvif_vals),
  GVIF_adj = as.numeric(gvif_adj_vals)
)

ggplot(vif_df, aes(x = reorder(Variabel, GVIF_adj),
                   y = GVIF_adj,
                   fill = GVIF_adj > thr_warn)) +
  geom_col(show.legend = FALSE) +
  geom_hline(yintercept = thr_warn, linetype = "dashed", color = "orange", linewidth = 0.8) +
  geom_hline(yintercept = thr_crit, linetype = "dashed", color = "red",    linewidth = 0.8) +
  annotate("text", x = 0.6, y = thr_warn + 0.03,
           label = paste0("\u221a5 = ", round(thr_warn, 3), " (peringatan)"),
           color = "orange", hjust = 0, size = 3.5, vjust = 0) +
  annotate("text", x = 0.6, y = thr_crit + 0.03,
           label = paste0("\u221a10 = ", round(thr_crit, 3), " (kritis)"),
           color = "red", hjust = 0, size = 3.5, vjust = 0) +
  scale_fill_manual(values = c("FALSE" = "steelblue", "TRUE" = "tomato")) +
  coord_flip() +
  theme_minimal(base_size = 12) +
  labs(title = "Generalized VIF — GVIF\u00b9\u141f\u00b2\u1d30\u1da0 per Variabel",
       subtitle = "Nilai yang tepat untuk variabel kategorik (setara \u221aVIF pada numerik)",
       x = "Variabel", y = "GVIF^(1/(2*Df))")

Terpenuhi — Sseuruh variabel berada di bawah threshold kritis √10 ≈ 3.162. Nilai GVIF mentah untuk Age dan Education_Level tampak besar karena memiliki banyak kategori, namun masih dibawah threshold, sehingga tidak terdapat multikolinearitas yang serius. Ini adalah asumsi wajib dan terkonfirmasi terpenuhi.


Asumsi 4: Tidak Ada Outlier Ekstrem

Outlier ekstrem dapat memengaruhi estimasi model secara signifikan. Karena semua variabel dalam model ini bersifat kategorik, deteksi outlier dilakukan menggunakan Cook’s Distance pada model OLS bantu, observasi dengan Cook’s Distance > 4/n dianggap berpengaruh besar.

ordinaldf$Y_num <- as.numeric(ordinaldf$Adaptivity_Level)

model_ols2 <- lm(
  Y_num ~ Gender + Age + Education_Level + Location +
          Financial_Condition + Load_shedding +
          Network_Type + Class_Duration + Self_Lms,
  data = ordinaldf
)
n         <- nrow(ordinaldf)
cooksd    <- cooks.distance(model_ols2)
threshold <- 4 / n

data.frame(
  Threshold       = round(threshold, 6),
  N_Berpengaruh   = sum(cooksd > threshold, na.rm = TRUE),
  Pct_Berpengaruh = paste0(round(mean(cooksd > threshold, na.rm = TRUE) * 100, 2), "%"),
  Cook_Max        = round(max(cooksd, na.rm = TRUE), 6)
)
Threshold N_Berpengaruh Pct_Berpengaruh Cook_Max
0.00332 72 5.98% 0.021356
ordinaldf$Y_num <- NULL
ordinaldf$Y_num <- as.numeric(ordinaldf$Adaptivity_Level)
model_ols3 <- lm(
  Y_num ~ Gender + Age + Education_Level + Location +
          Financial_Condition + Load_shedding +
          Network_Type + Class_Duration + Self_Lms,
  data = ordinaldf
)
n3      <- nrow(ordinaldf)
cooksd3 <- cooks.distance(model_ols3)
thresh3 <- 4 / n3
ordinaldf$Y_num <- NULL

plot(cooksd3, type = "h",
     main = "Cook's Distance untuk Deteksi Outlier / Influential Observation",
     ylab = "Cook's Distance",
     xlab = "Indeks Observasi",
     col  = ifelse(cooksd3 > thresh3, "tomato", "steelblue"))
abline(h = thresh3, col = "red", lwd = 2, lty = 2)
legend("topright",
       legend = c("Normal", "Berpengaruh (> 4/n)", "Threshold (4/n)"),
       col    = c("steelblue", "tomato", "red"),
       lty    = c(1, 1, 2),
       lwd    = 2,
       cex    = 0.85)

Perlu perhatian — Terdapat sejumlah observasi yang melampaui threshold Cook’s Distance (4/n). Hal ini wajar terjadi pada data kategorik dengan variasi nilai yang terbatas, karena observasi dalam kelompok kecil cenderung memiliki leverage lebih tinggi. Selama proporsinya tidak terlalu besar dan tidak ada satu titik pun yang nilainya sangat jauh melampaui yang lain, model masih dapat digunakan dan diinterpretasikan.


Estimasi Model

Fit Model Ordinal Logistic Regression

model_polr <- polr(
  Adaptivity_Level ~ Gender + Age + Education_Level + Location +
                     Financial_Condition + Load_shedding +
                     Network_Type + Class_Duration + Self_Lms,
  data   = ordinaldf,
  method = "logistic",
  Hess   = TRUE
)

Summary Model

summary(model_polr)
## Call:
## polr(formula = Adaptivity_Level ~ Gender + Age + Education_Level + 
##     Location + Financial_Condition + Load_shedding + Network_Type + 
##     Class_Duration + Self_Lms, data = ordinaldf, Hess = TRUE, 
##     method = "logistic")
## 
## Coefficients:
##                              Value Std. Error t value
## GenderGirl                -0.25837     0.1374 -1.8805
## Age6-10                    0.71599     0.4554  1.5721
## Age11-15                   0.04194     0.2854  0.1469
## Age16-20                  -0.96627     0.4350 -2.2212
## Age21-25                  -0.28343     0.5467 -0.5185
## Age26-30                   0.15845     0.6200  0.2556
## Education_LevelCollege    -0.30506     0.3882 -0.7857
## Education_LevelUniversity -0.41020     0.4778 -0.8586
## LocationYes                0.27542     0.1821  1.5122
## Financial_ConditionMid     0.41240     0.1794  2.2989
## Financial_ConditionRich    2.66027     0.3142  8.4662
## Load_sheddingHigh         -0.08123     0.1943 -0.4182
## Network_Type3G             1.51917     0.6821  2.2272
## Network_Type4G             2.40784     0.6847  3.5166
## Class_Duration1-3          3.39972     0.3560  9.5506
## Class_Duration3-6          3.82514     0.3889  9.8347
## Self_LmsYes                1.31377     0.2084  6.3028
## 
## Intercepts:
##               Value   Std. Error t value
## Low|Moderate   4.9778  0.8274     6.0162
## Moderate|High  8.6140  0.8465    10.1763
## 
## Residual Deviance: 1722.046 
## AIC: 1760.046
model_null <- polr(
  Adaptivity_Level ~ 1,
  data   = ordinaldf,
  method = "logistic",
  Hess   = TRUE
)

Uji Serentak — Likelihood Ratio Test

H₀: Semua koefisien prediktor = 0
H₁: Minimal satu koefisien ≠ 0
Tolak H₀ jika p-value < 0.05

LL_full <- as.numeric(logLik(model_polr))
LL_null <- as.numeric(logLik(model_null))
G2      <- -2 * (LL_null - LL_full)
df_lrt  <- length(coef(model_polr))
p_lrt   <- pchisq(G2, df = df_lrt, lower.tail = FALSE)

data.frame(
  Statistik      = round(G2, 4),
  df             = df_lrt,
  p_value        = format(p_lrt, scientific = TRUE, digits = 4),
  Keputusan      = ifelse(p_lrt < 0.05, "TOLAK H0", "GAGAL TOLAK H0"),
  Kesimpulan     = ifelse(p_lrt < 0.05,
    "Model signifikan secara serentak",
    "Model tidak signifikan secara serentak")
)
Statistik df p_value Keputusan Kesimpulan
480.0021 17 3.069e-91 TOLAK H0 Model signifikan secara serentak

Uji Parsial — Wald Test

H₀: Koefisien variabel ke-j = 0
H₁: Koefisien variabel ke-j ≠ 0
Tolak H₀ jika p-value < 0.05

coef_tbl <- coef(summary(model_polr))
z_vals   <- coef_tbl[, "t value"]
p_vals   <- 2 * pnorm(abs(z_vals), lower.tail = FALSE)

wald_result <- data.frame(
  Variabel  = rownames(coef_tbl),
  Koefisien = round(coef_tbl[, "Value"],      4),
  Std_Error = round(coef_tbl[, "Std. Error"], 4),
  z_value   = round(z_vals, 4),
  p_value   = round(p_vals, 6),
  Sig       = case_when(
    p_vals < 0.001 ~ "***",
    p_vals < 0.01  ~ "**",
    p_vals < 0.05  ~ "*",
    p_vals < 0.1   ~ ".",
    TRUE           ~ "ns"
  )
)
wald_result
Variabel Koefisien Std_Error z_value p_value Sig
GenderGirl GenderGirl -0.2584 0.1374 -1.8805 0.060039 .
Age6-10 Age6-10 0.7160 0.4554 1.5721 0.115920 ns
Age11-15 Age11-15 0.0419 0.2854 0.1469 0.883182 ns
Age16-20 Age16-20 -0.9663 0.4350 -2.2212 0.026340 *
Age21-25 Age21-25 -0.2834 0.5467 -0.5185 0.604138 ns
Age26-30 Age26-30 0.1585 0.6200 0.2556 0.798289 ns
Education_LevelCollege Education_LevelCollege -0.3051 0.3882 -0.7857 0.432015 ns
Education_LevelUniversity Education_LevelUniversity -0.4102 0.4778 -0.8586 0.390583 ns
LocationYes LocationYes 0.2754 0.1821 1.5122 0.130484 ns
Financial_ConditionMid Financial_ConditionMid 0.4124 0.1794 2.2989 0.021511 *
Financial_ConditionRich Financial_ConditionRich 2.6603 0.3142 8.4662 0.000000 ***
Load_sheddingHigh Load_sheddingHigh -0.0812 0.1943 -0.4182 0.675830 ns
Network_Type3G Network_Type3G 1.5192 0.6821 2.2272 0.025937 *
Network_Type4G Network_Type4G 2.4078 0.6847 3.5166 0.000437 ***
Class_Duration1-3 Class_Duration1-3 3.3997 0.3560 9.5506 0.000000 ***
Class_Duration3-6 Class_Duration3-6 3.8251 0.3889 9.8347 0.000000 ***
Self_LmsYes Self_LmsYes 1.3138 0.2084 6.3028 0.000000 ***
Low|Moderate Low|Moderate 4.9778 0.8274 6.0162 0.000000 ***
Moderate|High Moderate|High 8.6140 0.8465 10.1763 0.000000 ***

Goodness of Fit

McFadden R² Interpretasi
0.00 – 0.10 Lemah
0.10 – 0.20 Cukup
0.20 – 0.40 Baik
> 0.40 Sangat Baik
n           <- nrow(ordinaldf)
r2_mcfadden <- 1 - (LL_full / LL_null)
r2_cox      <- 1 - exp((2 / n) * (LL_null - LL_full))
r2_nag      <- r2_cox / (1 - exp((2 / n) * LL_null))

data.frame(
  Metrik = c("McFadden Pseudo R²", "Cox & Snell R²", "Nagelkerke R²",
             "AIC Full Model", "AIC Null Model", "Residual Deviance"),
  Nilai  = round(c(r2_mcfadden, r2_cox, r2_nag,
                   AIC(model_polr), AIC(model_null),
                   model_polr$deviance), 4)
)
Metrik Nilai
McFadden Pseudo R² 0.2180
Cox & Snell R² 0.3286
Nagelkerke R² 0.3915
AIC Full Model 1760.0456
AIC Null Model 2206.0477
Residual Deviance 1722.0456

Prediksi

Prediksi Kelas & Probabilitas

ordinaldf$pred_class <- predict(model_polr, newdata = ordinaldf, type = "class")

head(data.frame(
  Aktual   = ordinaldf$Adaptivity_Level,
  Prediksi = ordinaldf$pred_class
), 10)
Aktual Prediksi
Moderate Moderate
Moderate Moderate
Moderate Low
Moderate Moderate
Low Low
Low Moderate
Low Low
Moderate Moderate
Low Moderate
Moderate Moderate
prob_pred <- predict(model_polr, newdata = ordinaldf, type = "probs")
head(round(prob_pred, 4), 10)
##       Low Moderate   High
## 1  0.2228   0.6930 0.0842
## 2  0.1421   0.7206 0.1373
## 3  0.5030   0.4716 0.0254
## 4  0.2140   0.6978 0.0883
## 5  0.9880   0.0117 0.0003
## 6  0.4357   0.5313 0.0330
## 7  0.8630   0.1329 0.0042
## 8  0.1737   0.7149 0.1114
## 9  0.4387   0.5286 0.0326
## 10 0.3383   0.6127 0.0490

Evaluasi Model

Confusion Matrix

cm <- table(Aktual = ordinaldf$Adaptivity_Level, Prediksi = ordinaldf$pred_class)
print(cm)
##           Prediksi
## Aktual     Low Moderate High
##   Low      275      202    3
##   Moderate  75      545    5
##   High       0       84   16

Akurasi & Metrik Per-Kelas

akurasi <- sum(diag(cm)) / sum(cm)
cat(sprintf("Akurasi Model: %.2f%%\n\n", akurasi * 100))
## Akurasi Model: 69.38%
metrics <- do.call(rbind, lapply(rownames(cm), function(kelas) {
  tp   <- cm[kelas, kelas]
  fp   <- sum(cm[, kelas]) - tp
  fn   <- sum(cm[kelas, ]) - tp
  prec <- ifelse((tp + fp) > 0, tp / (tp + fp), NA)
  rec  <- ifelse((tp + fn) > 0, tp / (tp + fn), NA)
  data.frame(Kelas = kelas, Precision = round(prec, 3), Recall = round(rec, 3))
}))
print(metrics)
##      Kelas Precision Recall
## 1      Low     0.786  0.573
## 2 Moderate     0.656  0.872
## 3     High     0.667  0.160

Visualisasi Confusion Matrix

cm_df <- as.data.frame(cm)
ggplot(cm_df, aes(x = Prediksi, y = Aktual, fill = Freq)) +
  geom_tile(color = "white") +
  geom_text(aes(label = Freq), size = 7, fontface = "bold") +
  scale_fill_gradient(low = "#f0f7ff", high = "#2171b5") +
  theme_minimal(base_size = 13) +
  labs(title = "Confusion Matrix — Ordinal Logistic Regression",
       x = "Prediksi", y = "Aktual", fill = "Jumlah")


Interpretasi

Odds Ratio

or_vals <- exp(coef(model_polr))
se_vals <- sqrt(diag(vcov(model_polr))[names(coef(model_polr))])

or_tbl <- data.frame(
  Variabel   = names(or_vals),
  OR         = round(or_vals, 4),
  CI_Lower   = round(exp(coef(model_polr) - 1.96 * se_vals), 4),
  CI_Upper   = round(exp(coef(model_polr) + 1.96 * se_vals), 4)
)
or_tbl
Variabel OR CI_Lower CI_Upper
GenderGirl GenderGirl 0.7723 0.5900 1.0110
Age6-10 Age6-10 2.0462 0.8381 4.9959
Age11-15 Age11-15 1.0428 0.5960 1.8246
Age16-20 Age16-20 0.3805 0.1622 0.8926
Age21-25 Age21-25 0.7532 0.2580 2.1991
Age26-30 Age26-30 1.1717 0.3476 3.9499
Education_LevelCollege Education_LevelCollege 0.7371 0.3444 1.5776
Education_LevelUniversity Education_LevelUniversity 0.6635 0.2601 1.6926
LocationYes LocationYes 1.3171 0.9217 1.8821
Financial_ConditionMid Financial_ConditionMid 1.5104 1.0627 2.1468
Financial_ConditionRich Financial_ConditionRich 14.3002 7.7245 26.4737
Load_sheddingHigh Load_sheddingHigh 0.9220 0.6300 1.3492
Network_Type3G Network_Type3G 4.5684 1.1999 17.3936
Network_Type4G Network_Type4G 11.1099 2.9031 42.5158
Class_Duration1-3 Class_Duration1-3 29.9557 14.9099 60.1847
Class_Duration3-6 Class_Duration3-6 45.8393 21.3876 98.2456
Self_LmsYes Self_LmsYes 3.7202 2.4725 5.5975

Forest Plot Odds Ratio

ggplot(or_tbl, aes(x = OR, y = reorder(Variabel, OR))) +
  geom_point(size = 3.5, color = "steelblue") +
  geom_errorbarh(aes(xmin = CI_Lower, xmax = CI_Upper), height = 0.25, color = "steelblue") +
  geom_vline(xintercept = 1, linetype = "dashed", color = "red", linewidth = 0.8) +
  theme_minimal(base_size = 12) +
  labs(title = "Odds Ratio dengan 95% Confidence Interval",
       subtitle = "Garis merah = OR 1 (tidak ada efek)",
       x = "Odds Ratio", y = "Variabel")

Pengaruh Class Duration terhadap Probabilitas Adaptivity Level

# Buat grid nilai Class_Duration
grid <- data.frame(
  Class_Duration    = factor(c("0", "1-3", "3-6"), levels = c("0", "1-3", "3-6")),
  Gender            = factor("Boy", levels = c("Boy", "Girl")),
  Age               = factor("21-25", levels = c("1-5", "6-10", "11-15", "16-20", "21-25", "26-30")),
  Education_Level   = factor("University", levels = c("School", "College", "University")),
  Location          = factor("Yes", levels = c("No", "Yes")),
  Financial_Condition = factor("Mid", levels = c("Poor", "Mid", "Rich")),
  Load_shedding     = factor("Low", levels = c("Low", "High")),
  Network_Type      = factor("4G", levels = c("2G", "3G", "4G")),
  Self_Lms          = factor("No", levels = c("No", "Yes"))
)

# Prediksi probabilitas
prob <- predict(model_polr, newdata = grid, type = "probs")

# Gabungkan ke data
prob_df <- cbind(grid, prob)

# Ubah ke long format
prob_long <- prob_df %>%
  pivot_longer(cols = c("Low", "Moderate", "High"),
               names_to = "Kategori",
               values_to = "Probabilitas")

# Plot
ggplot(prob_long, aes(x = Class_Duration, y = Probabilitas, fill = Kategori)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_minimal(base_size = 13) +
  labs(
    title = "Pengaruh Durasi Kelas terhadap Probabilitas Tingkat Adaptasi",
    x = "Class Duration (jam/hari)",
    y = "Probabilitas",
    fill = "Kategori"
  )

## Buat grid nilai Class_Duration
grid <- data.frame(
  Class_Duration = factor(c("0", "1-3", "3-6"),
                          levels = c("0", "1-3", "3-6")),
  Gender = factor("Boy", levels = c("Boy", "Girl")),
  Age = factor("21-25", levels = levels(ordinaldf$Age)),
  Education_Level = factor("University", levels = levels(ordinaldf$Education_Level)),
  Location = factor("Yes", levels = levels(ordinaldf$Location)),
  Financial_Condition = factor("Mid", levels = levels(ordinaldf$Financial_Condition)),
  Load_shedding = factor("Low", levels = levels(ordinaldf$Load_shedding)),
  Network_Type = factor("4G", levels = levels(ordinaldf$Network_Type)),
  Self_Lms = factor("No", levels = levels(ordinaldf$Self_Lms))
)

## Prediksi probabilitas
prob <- predict(model_polr, newdata = grid, type = "probs")

## Gabungkan
prob_df <- cbind(grid, prob)

## Long format
library(tidyr)
prob_long <- prob_df %>%
  pivot_longer(cols = c("Low", "Moderate", "High"),
               names_to = "Kategori",
               values_to = "Probabilitas")

## Plot garis (INI YANG MELENGKUNG)
library(ggplot2)

ggplot(prob_long, aes(x = Class_Duration,
                      y = Probabilitas,
                      group = Kategori,
                      color = Kategori)) +
  geom_line(size = 1.5) +
  geom_point(size = 3) +
  theme_minimal(base_size = 13) +
  labs(
    title = "Kurva Probabilitas Ordinal Logistic Regression",
    subtitle = "Pengaruh Class Duration terhadap Adaptivity Level",
    x = "Class Duration",
    y = "Probabilitas",
    color = "Kategori"
  )