#Ordinal Logistic Regression
# Install the package
#install.packages("ucimlrepo")
# Load the package
library(ucimlrepo)
## Warning: package 'ucimlrepo' was built under R version 4.4.1
library(MASS)
# URL dataset Car Evaluation
url <- "https://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data"
# Baca langsung dari URL
car_data <- read.csv(url, header = FALSE, stringsAsFactors = FALSE)
# Simpan lokal sebagai RDS
saveRDS(car_data, "car_data_local.rds")
# Load kembali dari file RDS
car_data <- readRDS("car_data_local.rds")
# Beri nama kolom sesuai dokumentasi
colnames(car_data) <- c("buying","maint","doors","persons","lug_boot","safety","class")
# Cek 6 baris pertama
head(car_data)
## buying maint doors persons lug_boot safety class
## 1 vhigh vhigh 2 2 small low unacc
## 2 vhigh vhigh 2 2 small med unacc
## 3 vhigh vhigh 2 2 small high unacc
## 4 vhigh vhigh 2 2 med low unacc
## 5 vhigh vhigh 2 2 med med unacc
## 6 vhigh vhigh 2 2 med high unacc
# Ubah target menjadi faktor ordinal
car_data$class <- factor(car_data$class,
levels=c("unacc","acc","good","vgood"),
ordered=TRUE)
problem_vars <- names(which(sapply(car_data, function(x) length(unique(x)) < 2)))
problem_vars
## character(0)
head(car_data)
## buying maint doors persons lug_boot safety class
## 1 vhigh vhigh 2 2 small low unacc
## 2 vhigh vhigh 2 2 small med unacc
## 3 vhigh vhigh 2 2 small high unacc
## 4 vhigh vhigh 2 2 med low unacc
## 5 vhigh vhigh 2 2 med med unacc
## 6 vhigh vhigh 2 2 med high unacc
# Define the X variables
car_data$persons <- factor(car_data$persons)
car_data$lug_boot <- factor(car_data$lug_boot)
car_data$safety <- factor(car_data$safety)
levels(car_data$persons)
## [1] "2" "4" "more"
levels(car_data$lug_boot)
## [1] "big" "med" "small"
levels(car_data$safety)
## [1] "high" "low" "med"
#Y variable
levels(car_data$class)
## [1] "unacc" "acc" "good" "vgood"
# Ordinal logistic regression
model_ord <- polr(class ~ persons + lug_boot + safety, data = car_data, Hess=TRUE)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
#Pengechekan asumsi
library(car)
## Loading required package: carData
vif(model_ord)
## GVIF Df GVIF^(1/(2*Df))
## persons 214.6030 2 3.827445
## lug_boot 233.9319 2 3.910861
## safety 1311.1975 2 6.017513
# Estimation
summary(model_ord)
## Call:
## polr(formula = class ~ persons + lug_boot + safety, data = car_data,
## Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## persons4 19.0903 9.076e-02 2.103e+02
## personsmore 19.0265 9.033e-02 2.106e+02
## lug_bootmed -0.5284 1.735e-01 -3.046e+00
## lug_bootsmall -1.6077 1.813e-01 -8.869e+00
## safetylow -18.9287 6.999e-08 -2.704e+08
## safetymed -1.1948 1.465e-01 -8.153e+00
##
## Intercepts:
## Value Std. Error t value
## unacc|acc 1.688340e+01 1.132000e-01 1.491291e+02
## acc|good 1.946780e+01 1.148000e-01 1.695531e+02
## good|vgood 2.035750e+01 1.429000e-01 1.424435e+02
##
## Residual Deviance: 1601.804
## AIC: 1619.804
coefs <- coef(model_ord)
# Generate the p-value
(ctable <- coef(summary(model_ord)))
## Value Std. Error t value
## persons4 19.0902790 9.075911e-02 2.103401e+02
## personsmore 19.0265276 9.032734e-02 2.106397e+02
## lug_bootmed -0.5284298 1.734979e-01 -3.045741e+00
## lug_bootsmall -1.6077016 1.812786e-01 -8.868679e+00
## safetylow -18.9286951 6.999114e-08 -2.704441e+08
## safetymed -1.1948360 1.465493e-01 -8.153136e+00
## unacc|acc 16.8834066 1.132133e-01 1.491291e+02
## acc|good 19.4677563 1.148181e-01 1.695531e+02
## good|vgood 20.3574631 1.429161e-01 1.424435e+02
p_values <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2
coefs <- cbind(coefs, "p value" = p_values)
## Warning in cbind(coefs, `p value` = p_values): number of rows of result is not
## a multiple of vector length (arg 1)
coefs
## coefs p value
## persons4 19.0902790 0.000000e+00
## personsmore 19.0265276 0.000000e+00
## lug_bootmed -0.5284298 2.321074e-03
## lug_bootsmall -1.6077016 7.401873e-19
## safetylow -18.9286951 0.000000e+00
## safetymed -1.1948360 3.546076e-16
## unacc|acc 19.0902790 0.000000e+00
## acc|good 19.0265276 0.000000e+00
## good|vgood -0.5284298 0.000000e+00
#Generate the Odds Ratio
OR <- exp(coef(model_ord))
OR
## persons4 personsmore lug_bootmed lug_bootsmall safetylow
## 1.953452e+08 1.832804e+08 5.895299e-01 2.003476e-01 6.016892e-09
## safetymed
## 3.027536e-01
#Uji serentak
# Model null (tanpa prediktor)
model_null <- polr(class ~ 1, data = car_data, Hess = TRUE)
# Likelihood ratio test
anova(model_null, model_ord)
## Likelihood ratio tests of ordinal regression models
##
## Response: class
## Model Resid. df Resid. Dev Test Df LR stat.
## 1 1 1725 2888.373
## 2 persons + lug_boot + safety 1719 1601.804 1 vs 2 6 1286.569
## Pr(Chi)
## 1
## 2 0
loglik_full <- logLik(model_ord)
loglik_null <- logLik(model_null)
# Perform likelihood ratio test
G <- -2*(loglik_null-loglik_full)
G
## 'log Lik.' 1286.569 (df=3)
# Derajat bebas
df_lr <- attr(loglik_full, "df") - attr(loglik_null, "df")
# p-value
p_lr <- 1 - pchisq(G, df_lr)
p_lr
## 'log Lik.' 0 (df=3)
#partial test
# Tabel koefisien
(ctable <- coef(summary(model_ord)))
## Value Std. Error t value
## persons4 19.0902790 9.075911e-02 2.103401e+02
## personsmore 19.0265276 9.032734e-02 2.106397e+02
## lug_bootmed -0.5284298 1.734979e-01 -3.045741e+00
## lug_bootsmall -1.6077016 1.812786e-01 -8.868679e+00
## safetylow -18.9286951 6.999114e-08 -2.704441e+08
## safetymed -1.1948360 1.465493e-01 -8.153136e+00
## unacc|acc 16.8834066 1.132133e-01 1.491291e+02
## acc|good 19.4677563 1.148181e-01 1.695531e+02
## good|vgood 20.3574631 1.429161e-01 1.424435e+02
# p-value untuk setiap koefisien (Wald test)
p_values <- 2 * (1 - pnorm(abs(ctable[, "t value"])))
p_values
## persons4 personsmore lug_bootmed lug_bootsmall safetylow
## 0.000000e+00 0.000000e+00 2.321074e-03 0.000000e+00 0.000000e+00
## safetymed unacc|acc acc|good good|vgood
## 4.440892e-16 0.000000e+00 0.000000e+00 0.000000e+00
# Goodness of Fit using Deviance test
# Extract model deviance
deviance_value <- model_ord$deviance
# Extract residual degrees of freedom
df_residual <- model_ord$df.residual
# p-value for GOF
gof_p_value <- 1 - pchisq(deviance_value, df_residual)
deviance_value
## [1] 1601.804
df_residual
## [1] 1719
gof_p_value
## [1] 0.9790565
if(gof_p_value > 0.05){
print("Model memiliki Goodness of Fit yang baik (tidak ada penolakan H0).")
} else {
print("Model tidak fit. Ada bukti model kurang sesuai dengan data.")
}
## [1] "Model memiliki Goodness of Fit yang baik (tidak ada penolakan H0)."
#Koef Determinasi
LL_full <- as.numeric(logLik(model_ord))
LL_null <- as.numeric(logLik(model_null))
n <- nrow(car_data)
R2_cox <- 1 - exp((LL_null - LL_full) * 2 / n)
R2_nagel <- R2_cox / (1 - exp(2 * LL_null / n))
R2_nagel
## [1] 0.6465814
#APER
pred_class <- predict(model_ord, type = "class")
pred_class <- factor(pred_class,
levels = levels(car_data$class),
ordered = TRUE)
tab <- table(Predicted = pred_class, Actual = car_data$class)
tab
## Actual
## Predicted unacc acc good vgood
## unacc 1053 35 0 0
## acc 157 349 69 65
## good 0 0 0 0
## vgood 0 0 0 0
misclass <- sum(pred_class != car_data$class)
N <- nrow(car_data)
APER <- misclass / N
APER
## [1] 0.1886574
Accuracy <- 1 - APER
Accuracy
## [1] 0.8113426
library(brant)
# Pastikan model ordinal logistic regression (polr) sudah dibuat
# Contoh model:
library(MASS)
model_polr <- polr(class ~ persons + lug_boot + safety,
data = car_data, Hess = TRUE)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# Melakukan uji proportional odds
brant_test <- brant(model_polr)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## --------------------------------------------
## Test for X2 df probability
## --------------------------------------------
## Omnibus 3.57 12 0.99
## persons4 0 2 1
## personsmore 0 2 1
## lug_bootmed 2.13 2 0.34
## lug_bootsmall 0.56 2 0.75
## safetylow 0 2 1
## safetymed 0.02 2 0.99
## --------------------------------------------
##
## H0: Parallel Regression Assumption holds
## Warning in brant(model_polr): 57 combinations in table(dv,ivs) do not occur.
## Because of that, the test results might be invalid.
# Menampilkan hasil uji
brant_test
## X2 df probability
## Omnibus 3.565795e+00 12 0.9900604
## persons4 -5.035174e-06 2 1.0000000
## personsmore -4.550534e-06 2 1.0000000
## lug_bootmed 2.134405e+00 2 0.3439694
## lug_bootsmall 5.634445e-01 2 0.7544832
## safetylow -4.836520e-06 2 1.0000000
## safetymed 2.004418e-02 2 0.9900280