Tugas 2 ADK

#Ordinal Logistic Regression
# Install the package
#install.packages("ucimlrepo")

# Load the package
library(ucimlrepo)

## Warning: package 'ucimlrepo' was built under R version 4.4.1

library(MASS)


# URL dataset Car Evaluation
url <- "https://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data"

# Baca langsung dari URL
car_data <- read.csv(url, header = FALSE, stringsAsFactors = FALSE)

# Simpan lokal sebagai RDS
saveRDS(car_data, "car_data_local.rds")

# Load kembali dari file RDS
car_data <- readRDS("car_data_local.rds")

# Beri nama kolom sesuai dokumentasi
colnames(car_data) <- c("buying","maint","doors","persons","lug_boot","safety","class")

# Cek 6 baris pertama
head(car_data)

##   buying maint doors persons lug_boot safety class
## 1  vhigh vhigh     2       2    small    low unacc
## 2  vhigh vhigh     2       2    small    med unacc
## 3  vhigh vhigh     2       2    small   high unacc
## 4  vhigh vhigh     2       2      med    low unacc
## 5  vhigh vhigh     2       2      med    med unacc
## 6  vhigh vhigh     2       2      med   high unacc

# Ubah target menjadi faktor ordinal
car_data$class <- factor(car_data$class, 
                         levels=c("unacc","acc","good","vgood"), 
                         ordered=TRUE)


problem_vars <- names(which(sapply(car_data, function(x) length(unique(x)) < 2)))
problem_vars

## character(0)

head(car_data)

##   buying maint doors persons lug_boot safety class
## 1  vhigh vhigh     2       2    small    low unacc
## 2  vhigh vhigh     2       2    small    med unacc
## 3  vhigh vhigh     2       2    small   high unacc
## 4  vhigh vhigh     2       2      med    low unacc
## 5  vhigh vhigh     2       2      med    med unacc
## 6  vhigh vhigh     2       2      med   high unacc

# Define the X variables
car_data$persons <- factor(car_data$persons)
car_data$lug_boot <- factor(car_data$lug_boot)
car_data$safety <- factor(car_data$safety)
levels(car_data$persons)

## [1] "2"    "4"    "more"

levels(car_data$lug_boot)

## [1] "big"   "med"   "small"

levels(car_data$safety)

## [1] "high" "low"  "med"

#Y variable
levels(car_data$class)

## [1] "unacc" "acc"   "good"  "vgood"

# Ordinal logistic regression
model_ord <- polr(class ~ persons + lug_boot + safety, data = car_data, Hess=TRUE)

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

#Pengechekan asumsi
library(car)

## Loading required package: carData

vif(model_ord)

##               GVIF Df GVIF^(1/(2*Df))
## persons   214.6030  2        3.827445
## lug_boot  233.9319  2        3.910861
## safety   1311.1975  2        6.017513

# Estimation
summary(model_ord)

## Call:
## polr(formula = class ~ persons + lug_boot + safety, data = car_data, 
##     Hess = TRUE)
## 
## Coefficients:
##                  Value Std. Error    t value
## persons4       19.0903  9.076e-02  2.103e+02
## personsmore    19.0265  9.033e-02  2.106e+02
## lug_bootmed    -0.5284  1.735e-01 -3.046e+00
## lug_bootsmall  -1.6077  1.813e-01 -8.869e+00
## safetylow     -18.9287  6.999e-08 -2.704e+08
## safetymed      -1.1948  1.465e-01 -8.153e+00
## 
## Intercepts:
##            Value         Std. Error    t value      
## unacc|acc   1.688340e+01  1.132000e-01  1.491291e+02
## acc|good    1.946780e+01  1.148000e-01  1.695531e+02
## good|vgood  2.035750e+01  1.429000e-01  1.424435e+02
## 
## Residual Deviance: 1601.804 
## AIC: 1619.804

coefs <- coef(model_ord)

# Generate the p-value
(ctable <- coef(summary(model_ord)))

##                     Value   Std. Error       t value
## persons4       19.0902790 9.075911e-02  2.103401e+02
## personsmore    19.0265276 9.032734e-02  2.106397e+02
## lug_bootmed    -0.5284298 1.734979e-01 -3.045741e+00
## lug_bootsmall  -1.6077016 1.812786e-01 -8.868679e+00
## safetylow     -18.9286951 6.999114e-08 -2.704441e+08
## safetymed      -1.1948360 1.465493e-01 -8.153136e+00
## unacc|acc      16.8834066 1.132133e-01  1.491291e+02
## acc|good       19.4677563 1.148181e-01  1.695531e+02
## good|vgood     20.3574631 1.429161e-01  1.424435e+02

p_values <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2
coefs <- cbind(coefs, "p value" = p_values)

## Warning in cbind(coefs, `p value` = p_values): number of rows of result is not
## a multiple of vector length (arg 1)

coefs

##                     coefs      p value
## persons4       19.0902790 0.000000e+00
## personsmore    19.0265276 0.000000e+00
## lug_bootmed    -0.5284298 2.321074e-03
## lug_bootsmall  -1.6077016 7.401873e-19
## safetylow     -18.9286951 0.000000e+00
## safetymed      -1.1948360 3.546076e-16
## unacc|acc      19.0902790 0.000000e+00
## acc|good       19.0265276 0.000000e+00
## good|vgood     -0.5284298 0.000000e+00

#Generate the Odds Ratio
OR <- exp(coef(model_ord))
OR

##      persons4   personsmore   lug_bootmed lug_bootsmall     safetylow 
##  1.953452e+08  1.832804e+08  5.895299e-01  2.003476e-01  6.016892e-09 
##     safetymed 
##  3.027536e-01

#Uji serentak
# Model null (tanpa prediktor)
model_null <- polr(class ~ 1, data = car_data, Hess = TRUE)

# Likelihood ratio test
anova(model_null, model_ord)

## Likelihood ratio tests of ordinal regression models
## 
## Response: class
##                         Model Resid. df Resid. Dev   Test    Df LR stat.
## 1                           1      1725   2888.373                      
## 2 persons + lug_boot + safety      1719   1601.804 1 vs 2     6 1286.569
##   Pr(Chi)
## 1        
## 2       0

loglik_full <- logLik(model_ord)
loglik_null <- logLik(model_null)

# Perform likelihood ratio test
G <- -2*(loglik_null-loglik_full)
G

## 'log Lik.' 1286.569 (df=3)

# Derajat bebas
df_lr <- attr(loglik_full, "df") - attr(loglik_null, "df")

# p-value
p_lr <- 1 - pchisq(G, df_lr)
p_lr

## 'log Lik.' 0 (df=3)

#partial test
# Tabel koefisien
(ctable <- coef(summary(model_ord)))

##                     Value   Std. Error       t value
## persons4       19.0902790 9.075911e-02  2.103401e+02
## personsmore    19.0265276 9.032734e-02  2.106397e+02
## lug_bootmed    -0.5284298 1.734979e-01 -3.045741e+00
## lug_bootsmall  -1.6077016 1.812786e-01 -8.868679e+00
## safetylow     -18.9286951 6.999114e-08 -2.704441e+08
## safetymed      -1.1948360 1.465493e-01 -8.153136e+00
## unacc|acc      16.8834066 1.132133e-01  1.491291e+02
## acc|good       19.4677563 1.148181e-01  1.695531e+02
## good|vgood     20.3574631 1.429161e-01  1.424435e+02

# p-value untuk setiap koefisien (Wald test)
p_values <- 2 * (1 - pnorm(abs(ctable[, "t value"])))
p_values

##      persons4   personsmore   lug_bootmed lug_bootsmall     safetylow 
##  0.000000e+00  0.000000e+00  2.321074e-03  0.000000e+00  0.000000e+00 
##     safetymed     unacc|acc      acc|good    good|vgood 
##  4.440892e-16  0.000000e+00  0.000000e+00  0.000000e+00

# Goodness of Fit using Deviance test

# Extract model deviance
deviance_value <- model_ord$deviance

# Extract residual degrees of freedom
df_residual <- model_ord$df.residual

# p-value for GOF
gof_p_value <- 1 - pchisq(deviance_value, df_residual)

deviance_value

## [1] 1601.804

df_residual

## [1] 1719

gof_p_value

## [1] 0.9790565

if(gof_p_value > 0.05){
  print("Model memiliki Goodness of Fit yang baik (tidak ada penolakan H0).")
} else {
  print("Model tidak fit. Ada bukti model kurang sesuai dengan data.")
}

## [1] "Model memiliki Goodness of Fit yang baik (tidak ada penolakan H0)."

#Koef Determinasi
LL_full <- as.numeric(logLik(model_ord))
LL_null <- as.numeric(logLik(model_null))
n <- nrow(car_data)

R2_cox <- 1 - exp((LL_null - LL_full) * 2 / n)
R2_nagel <- R2_cox / (1 - exp(2 * LL_null / n))

R2_nagel

## [1] 0.6465814

#APER
pred_class <- predict(model_ord, type = "class")
pred_class <- factor(pred_class,
                     levels = levels(car_data$class),
                     ordered = TRUE)
tab <- table(Predicted = pred_class, Actual = car_data$class)
tab

##          Actual
## Predicted unacc  acc good vgood
##     unacc  1053   35    0     0
##     acc     157  349   69    65
##     good      0    0    0     0
##     vgood     0    0    0     0

misclass <- sum(pred_class != car_data$class)
N <- nrow(car_data)
APER <- misclass / N
APER

## [1] 0.1886574

Accuracy <- 1 - APER
Accuracy

## [1] 0.8113426

library(brant)

# Pastikan model ordinal logistic regression (polr) sudah dibuat
# Contoh model:
library(MASS)
model_polr <- polr(class ~ persons + lug_boot + safety,
                   data = car_data, Hess = TRUE)

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

# Melakukan uji proportional odds
brant_test <- brant(model_polr)

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## -------------------------------------------- 
## Test for X2  df  probability 
## -------------------------------------------- 
## Omnibus      3.57    12  0.99
## persons4 0   2   1
## personsmore  0   2   1
## lug_bootmed  2.13    2   0.34
## lug_bootsmall    0.56    2   0.75
## safetylow    0   2   1
## safetymed    0.02    2   0.99
## -------------------------------------------- 
## 
## H0: Parallel Regression Assumption holds

## Warning in brant(model_polr): 57 combinations in table(dv,ivs) do not occur.
## Because of that, the test results might be invalid.

# Menampilkan hasil uji
brant_test

##                          X2 df probability
## Omnibus        3.565795e+00 12   0.9900604
## persons4      -5.035174e-06  2   1.0000000
## personsmore   -4.550534e-06  2   1.0000000
## lug_bootmed    2.134405e+00  2   0.3439694
## lug_bootsmall  5.634445e-01  2   0.7544832
## safetylow     -4.836520e-06  2   1.0000000
## safetymed      2.004418e-02  2   0.9900280

Tugas 2 ADK

Aida

2025-11-11