Dataset: https://www.kaggle.com/datasets/mdmahmudulhasansuzan/students-adaptability-level-in-online-education
Variabel Dependen (Y): Adaptivity_Level
— Ordinal dengan tiga kategori:
Low < Moderate < High
Variabel Independen (X):
| Variabel | Tipe | Keterangan |
|---|---|---|
Gender |
Kategorik (Boy / Girl) | Jenis kelamin siswa |
Age |
Kategorik (1-5, 6-10, 11-15, 16-20, 21-25, 26-30) | Rentang usia siswa |
Education_Level |
Kategorik (School / College / University) | Jenjang pendidikan siswa |
Location |
Kategorik (Yes / No) | Lokasi tempat tinggal (perkotaan/pedesaan) |
Financial_Condition |
Kategorik (Poor / Mid / Rich) | Kondisi finansial keluarga |
Load_shedding |
Kategorik (Low / High) | Tingkat pemadaman listrik |
Network_Type |
Kategorik (2G / 3G / 4G) | Jenis jaringan internet yang digunakan |
Class_Duration |
Kategorik (0 / 1-3 / 3-6) | Durasi kelas online (jam per hari) |
Self_Lms |
Kategorik (No / Yes) | Penggunaan LMS mandiri oleh siswa |
Tujuan: Memodelkan faktor-faktor yang memengaruhi tingkat adaptasi siswa dalam pendidikan online menggunakan Ordinal Logistic Regression.
# install.packages(c("MASS", "brant", "car", "dplyr", "tidyr", "ggplot2"), quiet = TRUE)
library(MASS)
library(brant)
library(car)
library(dplyr)
library(tidyr)
library(ggplot2)| Gender | Age | Education.Level | Institution.Type | IT.Student | Location | Load.shedding | Financial.Condition | Internet.Type | Network.Type | Class.Duration | Self.Lms | Device | Adaptivity.Level |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Boy | 21-25 | University | Non Government | No | Yes | Low | Mid | Wifi | 4G | 3-6 | No | Tab | Moderate |
| Girl | 21-25 | University | Non Government | No | Yes | High | Mid | Mobile Data | 4G | 1-3 | Yes | Mobile | Moderate |
| Girl | 16-20 | College | Government | No | Yes | Low | Mid | Wifi | 4G | 1-3 | No | Mobile | Moderate |
| Girl | 11-15 | School | Non Government | No | Yes | Low | Mid | Mobile Data | 4G | 1-3 | No | Mobile | Moderate |
| Girl | 16-20 | School | Non Government | No | Yes | Low | Poor | Mobile Data | 3G | 0 | No | Mobile | Low |
| Boy | 11-15 | School | Non Government | No | Yes | Low | Poor | Mobile Data | 3G | 1-3 | No | Mobile | Low |
| Boy | 11-15 | School | Non Government | No | Yes | Low | Mid | Wifi | 4G | 0 | No | Mobile | Low |
| Boy | 11-15 | School | Non Government | No | Yes | Low | Mid | Wifi | 4G | 1-3 | No | Mobile | Moderate |
| Boy | 16-20 | College | Government | No | Yes | Low | Mid | Wifi | 4G | 1-3 | No | Mobile | Low |
| Boy | 11-15 | School | Non Government | No | Yes | Low | Mid | Mobile Data | 3G | 1-3 | No | Mobile | Moderate |
## 'data.frame': 1205 obs. of 14 variables:
## $ Gender : chr "Boy" "Girl" "Girl" "Girl" ...
## $ Age : chr "21-25" "21-25" "16-20" "11-15" ...
## $ Education.Level : chr "University" "University" "College" "School" ...
## $ Institution.Type : chr "Non Government" "Non Government" "Government" "Non Government" ...
## $ IT.Student : chr "No" "No" "No" "No" ...
## $ Location : chr "Yes" "Yes" "Yes" "Yes" ...
## $ Load.shedding : chr "Low" "High" "Low" "Low" ...
## $ Financial.Condition: chr "Mid" "Mid" "Mid" "Mid" ...
## $ Internet.Type : chr "Wifi" "Mobile Data" "Wifi" "Mobile Data" ...
## $ Network.Type : chr "4G" "4G" "4G" "4G" ...
## $ Class.Duration : chr "3-6" "1-3" "1-3" "1-3" ...
## $ Self.Lms : chr "No" "Yes" "No" "No" ...
## $ Device : chr "Tab" "Mobile" "Mobile" "Mobile" ...
## $ Adaptivity.Level : chr "Moderate" "Moderate" "Moderate" "Moderate" ...
## Gender Age Education.Level Institution.Type
## Length:1205 Length:1205 Length:1205 Length:1205
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## IT.Student Location Load.shedding Financial.Condition
## Length:1205 Length:1205 Length:1205 Length:1205
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## Internet.Type Network.Type Class.Duration Self.Lms
## Length:1205 Length:1205 Length:1205 Length:1205
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## Device Adaptivity.Level
## Length:1205 Length:1205
## Class :character Class :character
## Mode :character Mode :character
df %>%
select(Gender, Age, Education.Level, Location, Financial.Condition,
Load.shedding, Network.Type, Class.Duration, Self.Lms, Adaptivity.Level) %>%
pivot_longer(cols = everything()) %>%
ggplot(aes(x = value)) +
geom_bar(fill = "orange", color = "white") +
facet_wrap(~name, scales = "free") +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Distribusi Variabel Kategorik", x = NULL, y = "Frekuensi")ordinaldf <- df[, c("Adaptivity.Level", "Gender", "Age", "Education.Level",
"Location", "Financial.Condition", "Load.shedding",
"Network.Type", "Class.Duration", "Self.Lms")]
# Rename kolom agar lebih rapi
colnames(ordinaldf) <- c("Adaptivity_Level", "Gender", "Age", "Education_Level",
"Location", "Financial_Condition", "Load_shedding",
"Network_Type", "Class_Duration", "Self_Lms")ordinaldf$Gender <- factor(
ordinaldf$Gender,
levels = c("Boy", "Girl")
)
ordinaldf$Age <- factor(
ordinaldf$Age,
levels = c("1-5", "6-10", "11-15", "16-20", "21-25", "26-30")
)
ordinaldf$Education_Level <- factor(
ordinaldf$Education_Level,
levels = c("School", "College", "University")
)
ordinaldf$Location <- factor(
ordinaldf$Location,
levels = c("No", "Yes")
)
ordinaldf$Financial_Condition <- factor(
ordinaldf$Financial_Condition,
levels = c("Poor", "Mid", "Rich")
)
ordinaldf$Load_shedding <- factor(
ordinaldf$Load_shedding,
levels = c("Low", "High")
)
ordinaldf$Network_Type <- factor(
ordinaldf$Network_Type,
levels = c("2G", "3G", "4G")
)
ordinaldf$Class_Duration <- factor(
ordinaldf$Class_Duration,
levels = c("0", "1-3", "3-6")
)
ordinaldf$Self_Lms <- factor(
ordinaldf$Self_Lms,
levels = c("No", "Yes")
)
cat("Dimensi data:", dim(ordinaldf), "\n")## Dimensi data: 1205 10
| Adaptivity_Level | Gender | Age | Education_Level | Location | Financial_Condition | Load_shedding | Network_Type | Class_Duration | Self_Lms |
|---|---|---|---|---|---|---|---|---|---|
| Moderate | Boy | 21-25 | University | Yes | Mid | Low | 4G | 3-6 | No |
| Moderate | Girl | 21-25 | University | Yes | Mid | High | 4G | 1-3 | Yes |
| Moderate | Girl | 16-20 | College | Yes | Mid | Low | 4G | 1-3 | No |
| Moderate | Girl | 11-15 | School | Yes | Mid | Low | 4G | 1-3 | No |
| Low | Girl | 16-20 | School | Yes | Poor | Low | 3G | 0 | No |
| Low | Boy | 11-15 | School | Yes | Poor | Low | 3G | 1-3 | No |
Catatan: Dalam Ordinal Logistic Regression, asumsi yang wajib dipenuhi adalah Tidak Ada Multikolinearitas. Asumsi lainnya (variabel dependen ordinal, independensi observasi, dan tidak ada outlier) bersifat opsional diujikan sebagai kelengkapan analisis, namun tidak menjadi syarat mutlak.
Asumsi ini mensyaratkan bahwa variabel dependen harus berupa data ordinal, yaitu data kategorik yang memiliki urutan atau tingkatan yang bermakna antar kategorinya.
## Class: ordered factor
## Is ordered: TRUE
## Levels: Low < Moderate < High
tbl <- table(ordinaldf$Adaptivity_Level)
data.frame(
Kategori = names(tbl),
Frekuensi = as.integer(tbl),
Proporsi = paste0(round(prop.table(tbl) * 100, 2), "%")
)| Kategori | Frekuensi | Proporsi |
|---|---|---|
| Low | 480 | 39.83% |
| Moderate | 625 | 51.87% |
| High | 100 | 8.3% |
Terpenuhi —
Adaptivity_Levelmerupakan ordered factor dengan urutanLow < Moderate < High. Variabel ini mencerminkan tingkatan adaptasi siswa, sehingga memenuhi syarat sebagai variabel dependen ordinal.
Asumsi ini mensyaratkan bahwa setiap observasi (baris data) harus bersifat independen satu sama lain, artinya nilai dari satu pengamatan tidak memengaruhi atau bergantung pada pengamatan lainnya.
Tidak dapat dipastikan sepenuhnya Secara teknis tidak ditemukan baris yang identik sempurna. Namun, dataset ini berasal dari survei siswa tanpa ID unik per individu, sehingga independensi antar responden diasumsikan terpenuhi berdasarkan desain pengumpulan data. Dalam praktik, asumsi ini umumnya dipenuhi selama tidak ada pengukuran berulang pada subjek yang sama.
Multikolinearitas terjadi ketika dua atau lebih variabel independen memiliki korelasi yang tinggi satu sama lain. Kondisi ini dapat menyebabkan estimasi koefisien menjadi tidak stabil dan interpretasi menjadi sulit.
Karena semua variabel dalam model ini bersifat
kategorik (dengan lebih dari 2 kategori), fungsi
vif() dari package car secara otomatis
menghitung Generalized VIF (GVIF). Nilai GVIF mentah
tidak dapat dibandingkan langsung dengan threshold 5
atau 10, karena nilainya dipengaruhi oleh jumlah derajat bebas (Df) tiap
variabel. Nilai yang tepat untuk dievaluasi adalah kolom
GVIF^(1/(2*Df)) yaitu nilai yang sudah
disesuaikan dan setara dengan √VIF pada variabel numerik.
Kriteria yang digunakan:
Nilai GVIF^(1/(2*Df)) |
Interpretasi |
|---|---|
| < √5 ≈ 2.236 | Aman, tidak ada multikolinearitas |
| √5 s.d. √10 (2.236 – 3.162) | Peringatan, perlu diperhatikan |
| > √10 ≈ 3.162 | Multikolinearitas serius |
ordinaldf$Y_num <- as.numeric(ordinaldf$Adaptivity_Level)
model_ols <- lm(
Y_num ~ Gender + Age + Education_Level + Location +
Financial_Condition + Load_shedding +
Network_Type + Class_Duration + Self_Lms,
data = ordinaldf
)
# Tampilkan tabel GVIF lengkap
gvif_full <- vif(model_ols)
print(round(gvif_full, 4))## GVIF Df GVIF^(1/(2*Df))
## Gender 1.1447 1 1.0699
## Age 36.0697 5 1.4312
## Education_Level 27.4979 2 2.2899
## Location 1.3698 1 1.1704
## Financial_Condition 1.2880 2 1.0653
## Load_shedding 1.2545 1 1.1201
## Network_Type 1.4492 2 1.0972
## Class_Duration 1.5466 2 1.1152
## Self_Lms 1.5296 1 1.2368
# Ekstrak kolom GVIF^(1/(2*Df)) — kolom ke-3
gvif_adj <- gvif_full[, 3]
cat("Nilai tertinggi :", round(max(gvif_adj), 4), "\n")## Nilai tertinggi : 2.2899
## Nilai terendah : 1.0653
## Threshold aman : < 2.2361 (setara VIF < 5)
## Threshold kritis: < 3.1623 (setara VIF < 10)
ordinaldf$Y_num <- as.numeric(ordinaldf$Adaptivity_Level)
model_ols_viz <- lm(
Y_num ~ Gender + Age + Education_Level + Location +
Financial_Condition + Load_shedding +
Network_Type + Class_Duration + Self_Lms,
data = ordinaldf
)
gvif_vals <- vif(model_ols_viz)
ordinaldf$Y_num <- NULL
gvif_adj_vals <- gvif_vals[, 3]
thr_warn <- sqrt(5) # ≈ 2.236
thr_crit <- sqrt(10) # ≈ 3.162
vif_df <- data.frame(
Variabel = rownames(gvif_vals),
GVIF_adj = as.numeric(gvif_adj_vals)
)
ggplot(vif_df, aes(x = reorder(Variabel, GVIF_adj),
y = GVIF_adj,
fill = GVIF_adj > thr_warn)) +
geom_col(show.legend = FALSE) +
geom_hline(yintercept = thr_warn, linetype = "dashed", color = "orange", linewidth = 0.8) +
geom_hline(yintercept = thr_crit, linetype = "dashed", color = "red", linewidth = 0.8) +
annotate("text", x = 0.6, y = thr_warn + 0.03,
label = paste0("\u221a5 = ", round(thr_warn, 3), " (peringatan)"),
color = "orange", hjust = 0, size = 3.5, vjust = 0) +
annotate("text", x = 0.6, y = thr_crit + 0.03,
label = paste0("\u221a10 = ", round(thr_crit, 3), " (kritis)"),
color = "red", hjust = 0, size = 3.5, vjust = 0) +
scale_fill_manual(values = c("FALSE" = "steelblue", "TRUE" = "tomato")) +
coord_flip() +
theme_minimal(base_size = 12) +
labs(title = "Generalized VIF — GVIF\u00b9\u141f\u00b2\u1d30\u1da0 per Variabel",
subtitle = "Nilai yang tepat untuk variabel kategorik (setara \u221aVIF pada numerik)",
x = "Variabel", y = "GVIF^(1/(2*Df))")Terpenuhi — Sseuruh variabel berada di bawah threshold kritis √10 ≈ 3.162. Nilai GVIF mentah untuk
AgedanEducation_Leveltampak besar karena memiliki banyak kategori, namun masih dibawah threshold, sehingga tidak terdapat multikolinearitas yang serius. Ini adalah asumsi wajib dan terkonfirmasi terpenuhi.
Outlier ekstrem dapat memengaruhi estimasi model secara signifikan. Karena semua variabel dalam model ini bersifat kategorik, deteksi outlier dilakukan menggunakan Cook’s Distance pada model OLS bantu, observasi dengan Cook’s Distance > 4/n dianggap berpengaruh besar.
ordinaldf$Y_num <- as.numeric(ordinaldf$Adaptivity_Level)
model_ols2 <- lm(
Y_num ~ Gender + Age + Education_Level + Location +
Financial_Condition + Load_shedding +
Network_Type + Class_Duration + Self_Lms,
data = ordinaldf
)
n <- nrow(ordinaldf)
cooksd <- cooks.distance(model_ols2)
threshold <- 4 / n
data.frame(
Threshold = round(threshold, 6),
N_Berpengaruh = sum(cooksd > threshold, na.rm = TRUE),
Pct_Berpengaruh = paste0(round(mean(cooksd > threshold, na.rm = TRUE) * 100, 2), "%"),
Cook_Max = round(max(cooksd, na.rm = TRUE), 6)
)| Threshold | N_Berpengaruh | Pct_Berpengaruh | Cook_Max |
|---|---|---|---|
| 0.00332 | 72 | 5.98% | 0.021356 |
ordinaldf$Y_num <- as.numeric(ordinaldf$Adaptivity_Level)
model_ols3 <- lm(
Y_num ~ Gender + Age + Education_Level + Location +
Financial_Condition + Load_shedding +
Network_Type + Class_Duration + Self_Lms,
data = ordinaldf
)
n3 <- nrow(ordinaldf)
cooksd3 <- cooks.distance(model_ols3)
thresh3 <- 4 / n3
ordinaldf$Y_num <- NULL
plot(cooksd3, type = "h",
main = "Cook's Distance untuk Deteksi Outlier / Influential Observation",
ylab = "Cook's Distance",
xlab = "Indeks Observasi",
col = ifelse(cooksd3 > thresh3, "tomato", "steelblue"))
abline(h = thresh3, col = "red", lwd = 2, lty = 2)
legend("topright",
legend = c("Normal", "Berpengaruh (> 4/n)", "Threshold (4/n)"),
col = c("steelblue", "tomato", "red"),
lty = c(1, 1, 2),
lwd = 2,
cex = 0.85)Perlu perhatian — Terdapat sejumlah observasi yang melampaui threshold Cook’s Distance (4/n). Hal ini wajar terjadi pada data kategorik dengan variasi nilai yang terbatas, karena observasi dalam kelompok kecil cenderung memiliki leverage lebih tinggi. Selama proporsinya tidak terlalu besar dan tidak ada satu titik pun yang nilainya sangat jauh melampaui yang lain, model masih dapat digunakan dan diinterpretasikan.
## Call:
## polr(formula = Adaptivity_Level ~ Gender + Age + Education_Level +
## Location + Financial_Condition + Load_shedding + Network_Type +
## Class_Duration + Self_Lms, data = ordinaldf, Hess = TRUE,
## method = "logistic")
##
## Coefficients:
## Value Std. Error t value
## GenderGirl -0.25837 0.1374 -1.8805
## Age6-10 0.71599 0.4554 1.5721
## Age11-15 0.04194 0.2854 0.1469
## Age16-20 -0.96627 0.4350 -2.2212
## Age21-25 -0.28343 0.5467 -0.5185
## Age26-30 0.15845 0.6200 0.2556
## Education_LevelCollege -0.30506 0.3882 -0.7857
## Education_LevelUniversity -0.41020 0.4778 -0.8586
## LocationYes 0.27542 0.1821 1.5122
## Financial_ConditionMid 0.41240 0.1794 2.2989
## Financial_ConditionRich 2.66027 0.3142 8.4662
## Load_sheddingHigh -0.08123 0.1943 -0.4182
## Network_Type3G 1.51917 0.6821 2.2272
## Network_Type4G 2.40784 0.6847 3.5166
## Class_Duration1-3 3.39972 0.3560 9.5506
## Class_Duration3-6 3.82514 0.3889 9.8347
## Self_LmsYes 1.31377 0.2084 6.3028
##
## Intercepts:
## Value Std. Error t value
## Low|Moderate 4.9778 0.8274 6.0162
## Moderate|High 8.6140 0.8465 10.1763
##
## Residual Deviance: 1722.046
## AIC: 1760.046
H₀: Semua koefisien prediktor = 0
H₁: Minimal satu koefisien ≠ 0
Tolak H₀ jika p-value < 0.05
LL_full <- as.numeric(logLik(model_polr))
LL_null <- as.numeric(logLik(model_null))
G2 <- -2 * (LL_null - LL_full)
df_lrt <- length(coef(model_polr))
p_lrt <- pchisq(G2, df = df_lrt, lower.tail = FALSE)
data.frame(
Statistik = round(G2, 4),
df = df_lrt,
p_value = format(p_lrt, scientific = TRUE, digits = 4),
Keputusan = ifelse(p_lrt < 0.05, "TOLAK H0", "GAGAL TOLAK H0"),
Kesimpulan = ifelse(p_lrt < 0.05,
"Model signifikan secara serentak",
"Model tidak signifikan secara serentak")
)| Statistik | df | p_value | Keputusan | Kesimpulan |
|---|---|---|---|---|
| 480.0021 | 17 | 3.069e-91 | TOLAK H0 | Model signifikan secara serentak |
H₀: Koefisien variabel ke-j = 0
H₁: Koefisien variabel ke-j ≠ 0
Tolak H₀ jika p-value < 0.05
coef_tbl <- coef(summary(model_polr))
z_vals <- coef_tbl[, "t value"]
p_vals <- 2 * pnorm(abs(z_vals), lower.tail = FALSE)
wald_result <- data.frame(
Variabel = rownames(coef_tbl),
Koefisien = round(coef_tbl[, "Value"], 4),
Std_Error = round(coef_tbl[, "Std. Error"], 4),
z_value = round(z_vals, 4),
p_value = round(p_vals, 6),
Sig = case_when(
p_vals < 0.001 ~ "***",
p_vals < 0.01 ~ "**",
p_vals < 0.05 ~ "*",
p_vals < 0.1 ~ ".",
TRUE ~ "ns"
)
)
wald_result| Variabel | Koefisien | Std_Error | z_value | p_value | Sig | |
|---|---|---|---|---|---|---|
| GenderGirl | GenderGirl | -0.2584 | 0.1374 | -1.8805 | 0.060039 | . |
| Age6-10 | Age6-10 | 0.7160 | 0.4554 | 1.5721 | 0.115920 | ns |
| Age11-15 | Age11-15 | 0.0419 | 0.2854 | 0.1469 | 0.883182 | ns |
| Age16-20 | Age16-20 | -0.9663 | 0.4350 | -2.2212 | 0.026340 | * |
| Age21-25 | Age21-25 | -0.2834 | 0.5467 | -0.5185 | 0.604138 | ns |
| Age26-30 | Age26-30 | 0.1585 | 0.6200 | 0.2556 | 0.798289 | ns |
| Education_LevelCollege | Education_LevelCollege | -0.3051 | 0.3882 | -0.7857 | 0.432015 | ns |
| Education_LevelUniversity | Education_LevelUniversity | -0.4102 | 0.4778 | -0.8586 | 0.390583 | ns |
| LocationYes | LocationYes | 0.2754 | 0.1821 | 1.5122 | 0.130484 | ns |
| Financial_ConditionMid | Financial_ConditionMid | 0.4124 | 0.1794 | 2.2989 | 0.021511 | * |
| Financial_ConditionRich | Financial_ConditionRich | 2.6603 | 0.3142 | 8.4662 | 0.000000 | *** |
| Load_sheddingHigh | Load_sheddingHigh | -0.0812 | 0.1943 | -0.4182 | 0.675830 | ns |
| Network_Type3G | Network_Type3G | 1.5192 | 0.6821 | 2.2272 | 0.025937 | * |
| Network_Type4G | Network_Type4G | 2.4078 | 0.6847 | 3.5166 | 0.000437 | *** |
| Class_Duration1-3 | Class_Duration1-3 | 3.3997 | 0.3560 | 9.5506 | 0.000000 | *** |
| Class_Duration3-6 | Class_Duration3-6 | 3.8251 | 0.3889 | 9.8347 | 0.000000 | *** |
| Self_LmsYes | Self_LmsYes | 1.3138 | 0.2084 | 6.3028 | 0.000000 | *** |
| Low|Moderate | Low|Moderate | 4.9778 | 0.8274 | 6.0162 | 0.000000 | *** |
| Moderate|High | Moderate|High | 8.6140 | 0.8465 | 10.1763 | 0.000000 | *** |
| McFadden R² | Interpretasi |
|---|---|
| 0.00 – 0.10 | Lemah |
| 0.10 – 0.20 | Cukup |
| 0.20 – 0.40 | Baik |
| > 0.40 | Sangat Baik |
n <- nrow(ordinaldf)
r2_mcfadden <- 1 - (LL_full / LL_null)
r2_cox <- 1 - exp((2 / n) * (LL_null - LL_full))
r2_nag <- r2_cox / (1 - exp((2 / n) * LL_null))
data.frame(
Metrik = c("McFadden Pseudo R²", "Cox & Snell R²", "Nagelkerke R²",
"AIC Full Model", "AIC Null Model", "Residual Deviance"),
Nilai = round(c(r2_mcfadden, r2_cox, r2_nag,
AIC(model_polr), AIC(model_null),
model_polr$deviance), 4)
)| Metrik | Nilai |
|---|---|
| McFadden Pseudo R² | 0.2180 |
| Cox & Snell R² | 0.3286 |
| Nagelkerke R² | 0.3915 |
| AIC Full Model | 1760.0456 |
| AIC Null Model | 2206.0477 |
| Residual Deviance | 1722.0456 |
ordinaldf$pred_class <- predict(model_polr, newdata = ordinaldf, type = "class")
head(data.frame(
Aktual = ordinaldf$Adaptivity_Level,
Prediksi = ordinaldf$pred_class
), 10)| Aktual | Prediksi |
|---|---|
| Moderate | Moderate |
| Moderate | Moderate |
| Moderate | Low |
| Moderate | Moderate |
| Low | Low |
| Low | Moderate |
| Low | Low |
| Moderate | Moderate |
| Low | Moderate |
| Moderate | Moderate |
## Low Moderate High
## 1 0.2228 0.6930 0.0842
## 2 0.1421 0.7206 0.1373
## 3 0.5030 0.4716 0.0254
## 4 0.2140 0.6978 0.0883
## 5 0.9880 0.0117 0.0003
## 6 0.4357 0.5313 0.0330
## 7 0.8630 0.1329 0.0042
## 8 0.1737 0.7149 0.1114
## 9 0.4387 0.5286 0.0326
## 10 0.3383 0.6127 0.0490
## Prediksi
## Aktual Low Moderate High
## Low 275 202 3
## Moderate 75 545 5
## High 0 84 16
## Akurasi Model: 69.38%
metrics <- do.call(rbind, lapply(rownames(cm), function(kelas) {
tp <- cm[kelas, kelas]
fp <- sum(cm[, kelas]) - tp
fn <- sum(cm[kelas, ]) - tp
prec <- ifelse((tp + fp) > 0, tp / (tp + fp), NA)
rec <- ifelse((tp + fn) > 0, tp / (tp + fn), NA)
data.frame(Kelas = kelas, Precision = round(prec, 3), Recall = round(rec, 3))
}))
print(metrics)## Kelas Precision Recall
## 1 Low 0.786 0.573
## 2 Moderate 0.656 0.872
## 3 High 0.667 0.160
cm_df <- as.data.frame(cm)
ggplot(cm_df, aes(x = Prediksi, y = Aktual, fill = Freq)) +
geom_tile(color = "white") +
geom_text(aes(label = Freq), size = 7, fontface = "bold") +
scale_fill_gradient(low = "#f0f7ff", high = "#2171b5") +
theme_minimal(base_size = 13) +
labs(title = "Confusion Matrix — Ordinal Logistic Regression",
x = "Prediksi", y = "Aktual", fill = "Jumlah")or_vals <- exp(coef(model_polr))
se_vals <- sqrt(diag(vcov(model_polr))[names(coef(model_polr))])
or_tbl <- data.frame(
Variabel = names(or_vals),
OR = round(or_vals, 4),
CI_Lower = round(exp(coef(model_polr) - 1.96 * se_vals), 4),
CI_Upper = round(exp(coef(model_polr) + 1.96 * se_vals), 4)
)
or_tbl| Variabel | OR | CI_Lower | CI_Upper | |
|---|---|---|---|---|
| GenderGirl | GenderGirl | 0.7723 | 0.5900 | 1.0110 |
| Age6-10 | Age6-10 | 2.0462 | 0.8381 | 4.9959 |
| Age11-15 | Age11-15 | 1.0428 | 0.5960 | 1.8246 |
| Age16-20 | Age16-20 | 0.3805 | 0.1622 | 0.8926 |
| Age21-25 | Age21-25 | 0.7532 | 0.2580 | 2.1991 |
| Age26-30 | Age26-30 | 1.1717 | 0.3476 | 3.9499 |
| Education_LevelCollege | Education_LevelCollege | 0.7371 | 0.3444 | 1.5776 |
| Education_LevelUniversity | Education_LevelUniversity | 0.6635 | 0.2601 | 1.6926 |
| LocationYes | LocationYes | 1.3171 | 0.9217 | 1.8821 |
| Financial_ConditionMid | Financial_ConditionMid | 1.5104 | 1.0627 | 2.1468 |
| Financial_ConditionRich | Financial_ConditionRich | 14.3002 | 7.7245 | 26.4737 |
| Load_sheddingHigh | Load_sheddingHigh | 0.9220 | 0.6300 | 1.3492 |
| Network_Type3G | Network_Type3G | 4.5684 | 1.1999 | 17.3936 |
| Network_Type4G | Network_Type4G | 11.1099 | 2.9031 | 42.5158 |
| Class_Duration1-3 | Class_Duration1-3 | 29.9557 | 14.9099 | 60.1847 |
| Class_Duration3-6 | Class_Duration3-6 | 45.8393 | 21.3876 | 98.2456 |
| Self_LmsYes | Self_LmsYes | 3.7202 | 2.4725 | 5.5975 |
ggplot(or_tbl, aes(x = OR, y = reorder(Variabel, OR))) +
geom_point(size = 3.5, color = "steelblue") +
geom_errorbarh(aes(xmin = CI_Lower, xmax = CI_Upper), height = 0.25, color = "steelblue") +
geom_vline(xintercept = 1, linetype = "dashed", color = "red", linewidth = 0.8) +
theme_minimal(base_size = 12) +
labs(title = "Odds Ratio dengan 95% Confidence Interval",
subtitle = "Garis merah = OR 1 (tidak ada efek)",
x = "Odds Ratio", y = "Variabel")# Buat grid nilai Class_Duration
grid <- data.frame(
Class_Duration = factor(c("0", "1-3", "3-6"), levels = c("0", "1-3", "3-6")),
Gender = factor("Boy", levels = c("Boy", "Girl")),
Age = factor("21-25", levels = c("1-5", "6-10", "11-15", "16-20", "21-25", "26-30")),
Education_Level = factor("University", levels = c("School", "College", "University")),
Location = factor("Yes", levels = c("No", "Yes")),
Financial_Condition = factor("Mid", levels = c("Poor", "Mid", "Rich")),
Load_shedding = factor("Low", levels = c("Low", "High")),
Network_Type = factor("4G", levels = c("2G", "3G", "4G")),
Self_Lms = factor("No", levels = c("No", "Yes"))
)
# Prediksi probabilitas
prob <- predict(model_polr, newdata = grid, type = "probs")
# Gabungkan ke data
prob_df <- cbind(grid, prob)
# Ubah ke long format
prob_long <- prob_df %>%
pivot_longer(cols = c("Low", "Moderate", "High"),
names_to = "Kategori",
values_to = "Probabilitas")
# Plot
ggplot(prob_long, aes(x = Class_Duration, y = Probabilitas, fill = Kategori)) +
geom_bar(stat = "identity", position = "dodge") +
theme_minimal(base_size = 13) +
labs(
title = "Pengaruh Durasi Kelas terhadap Probabilitas Tingkat Adaptasi",
x = "Class Duration (jam/hari)",
y = "Probabilitas",
fill = "Kategori"
)## Buat grid nilai Class_Duration
grid <- data.frame(
Class_Duration = factor(c("0", "1-3", "3-6"),
levels = c("0", "1-3", "3-6")),
Gender = factor("Boy", levels = c("Boy", "Girl")),
Age = factor("21-25", levels = levels(ordinaldf$Age)),
Education_Level = factor("University", levels = levels(ordinaldf$Education_Level)),
Location = factor("Yes", levels = levels(ordinaldf$Location)),
Financial_Condition = factor("Mid", levels = levels(ordinaldf$Financial_Condition)),
Load_shedding = factor("Low", levels = levels(ordinaldf$Load_shedding)),
Network_Type = factor("4G", levels = levels(ordinaldf$Network_Type)),
Self_Lms = factor("No", levels = levels(ordinaldf$Self_Lms))
)
## Prediksi probabilitas
prob <- predict(model_polr, newdata = grid, type = "probs")
## Gabungkan
prob_df <- cbind(grid, prob)
## Long format
library(tidyr)
prob_long <- prob_df %>%
pivot_longer(cols = c("Low", "Moderate", "High"),
names_to = "Kategori",
values_to = "Probabilitas")
## Plot garis (INI YANG MELENGKUNG)
library(ggplot2)
ggplot(prob_long, aes(x = Class_Duration,
y = Probabilitas,
group = Kategori,
color = Kategori)) +
geom_line(size = 1.5) +
geom_point(size = 3) +
theme_minimal(base_size = 13) +
labs(
title = "Kurva Probabilitas Ordinal Logistic Regression",
subtitle = "Pengaruh Class Duration terhadap Adaptivity Level",
x = "Class Duration",
y = "Probabilitas",
color = "Kategori"
)