R Notebook

library(readxl)
data <- read_excel("C:/Users/tengk/Downloads/student-mat (1).xlsx")

## New names:
## • `` -> `...37`
## • `` -> `...38`

#View(data)
#data.frame(data)

y= data$Y <- ordered(data$Y, levels = c(1,2,3,4,5))
x1= data$studytime
x2= data$absences
x3=data$higher
x4=data$internet
x5=data$schoolsup

Model Rekap

# Pemasangan model Regresi Logistik Ordinal
library(MASS)
model_ordinal_5cat <- polr(
    y ~ x1+x2+x3+x4+x5,
    data = data,
    Hess = TRUE # Penting untuk menghitung kesalahan standar dan CI
)
model_ordinal_5cat

## Call:
## polr(formula = y ~ x1 + x2 + x3 + x4 + x5, data = data, Hess = TRUE)
## 
## Coefficients:
##          x1          x2       x3yes       x4yes       x5yes 
## -0.15220094  0.02880453 -1.50074926 -0.57726094  0.92732001 
## 
## Intercepts:
##       1|2       2|3       3|4       4|5 
## -3.494704 -2.664383 -1.792155 -1.160777 
## 
## Residual Deviance: 1193.268 
## AIC: 1211.268

Uji Serentak dan Parsial (Signifikansi) Uji Serentak (Overall/Simultaneous Test): Uji ini menilai apakah model secara keseluruhan lebih baik daripada model nol (intercept-only). Ini dilihat dari nilai Residual Deviance yang dibandingkan dengan distribusi Chi-square.

# Uji Rasio Kemungkinan (Likelihood Ratio Test) - Uji Serentak
# Membandingkan model Anda dengan model nol (model_null)
model_null <- polr(Y ~ 1, data = data, Hess = TRUE)
anova(model_null, model_ordinal_5cat, test = "Chisq")

# Interpretasi: Nilai p-value yang kecil (misalnya < 0.05) menunjukkan model Anda signifikan secara serentak.

Uji Parsial (Partial Test): Signifikansi setiap prediktor dilihat dari nilai t (atau Wald Z) dan p-value-nya.

# Menampilkan Ringkasan Model dan Uji Wald (Uji Parsial)
summary(model_ordinal_5cat)

## Call:
## polr(formula = y ~ x1 + x2 + x3 + x4 + x5, data = data, Hess = TRUE)
## 
## Coefficients:
##         Value Std. Error t value
## x1    -0.1522    0.11099  -1.371
## x2     0.0288    0.01313   2.195
## x3yes -1.5007    0.47921  -3.132
## x4yes -0.5773    0.24502  -2.356
## x5yes  0.9273    0.27274   3.400
## 
## Intercepts:
##     Value   Std. Error t value
## 1|2 -3.4947  0.5470    -6.3885
## 2|3 -2.6644  0.5373    -4.9587
## 3|4 -1.7922  0.5290    -3.3875
## 4|5 -1.1608  0.5248    -2.2120
## 
## Residual Deviance: 1193.268 
## AIC: 1211.268

# Untuk mendapatkan nilai p-value yang lebih akurat dari Uji Wald:
(ctable <- coef(summary(model_ordinal_5cat)))

##             Value Std. Error   t value
## x1    -0.15220094 0.11099066 -1.371295
## x2     0.02880453 0.01312543  2.194558
## x3yes -1.50074926 0.47921398 -3.131689
## x4yes -0.57726094 0.24502327 -2.355943
## x5yes  0.92732001 0.27274091  3.400003
## 1|2   -3.49470445 0.54703317 -6.388469
## 2|3   -2.66438286 0.53731898 -4.958661
## 3|4   -1.79215465 0.52904306 -3.387540
## 4|5   -1.16077662 0.52476155 -2.212008

p_values <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2
cbind(ctable, "p-value" = p_values)

##             Value Std. Error   t value      p-value
## x1    -0.15220094 0.11099066 -1.371295 1.702830e-01
## x2     0.02880453 0.01312543  2.194558 2.819533e-02
## x3yes -1.50074926 0.47921398 -3.131689 1.738037e-03
## x4yes -0.57726094 0.24502327 -2.355943 1.847573e-02
## x5yes  0.92732001 0.27274091  3.400003 6.738504e-04
## 1|2   -3.49470445 0.54703317 -6.388469 1.675548e-10
## 2|3   -2.66438286 0.53731898 -4.958661 7.098058e-07
## 3|4   -1.79215465 0.52904306 -3.387540 7.052238e-04
## 4|5   -1.16077662 0.52476155 -2.212008 2.696613e-02

# Interpretasi: Nilai p-value < 0.05 menunjukkan prediktor signifikan secara parsial.

1. Pseudo R^2 (Daya Prediktif)Pseudo R^2 mengukur seberapa baik variabel independen menjelaskan variasi pada variabel dependen.

# install.packages("pscl")
library(pscl)

## Classes and Methods for R originally developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University (2002-2015),
## by and under the direction of Simon Jackman.
## hurdle and zeroinfl functions by Achim Zeileis.

pR2(model_ordinal_5cat)

## fitting null model for pseudo-r2

##           llh       llhNull            G2      McFadden          r2ML 
## -596.63414957 -614.92818162   36.58806411    0.02974987    0.08846748 
##          r2CU 
##    0.09258205

# Interpretasi: Fokus pada nilai Nagelkerke atau McFadden.
# Nilai yang lebih tinggi mendekati 1 menunjukkan daya prediktif yang lebih baik.

Analisis Kesesuaian Model (Goodness-of-Fit) Uji kesesuaian model menilai apakah struktur model yang dihipotesiskan sesuai dengan data yang diamati.

library(ordinal)
# Uji Goodness-of-fit (nominal test)
model <- clm(Y ~ x1 + x2 + x3 +x4 +x5, data = data)
nominal_test(model)

#jika pvalue >0.05 maka model cocok/fit

Ketepatan Klasifikasi (Classification Accuracy) Ketepatan klasifikasi dihitung menggunakan Matriks Kebingungan (Confusion Matrix) dengan membandingkan kategori nilai aktual dan kategori nilai yang diprediksi model.

# install.packages("caret")
library(caret)

## Loading required package: ggplot2

## Loading required package: lattice

# Prediksi Kategori dari Model
predicted_categories <- predict(model_ordinal_5cat, data)

# Matriks Kebingungan
# ConfusionMatrix(Data Sebenarnya, Data Prediksi)
confusionMatrix(data = predicted_categories, reference = data$Y)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   1   2   3   4   5
##          1  33  16  14  10  26
##          2   0   0   0   0   0
##          3   0   0   0   0   0
##          4   0   0   0   0   0
##          5  40  42  64  46 104
## 
## Overall Statistics
##                                           
##                Accuracy : 0.3468          
##                  95% CI : (0.2999, 0.3961)
##     No Information Rate : 0.3291          
##     P-Value [Acc > NIR] : 0.2422          
##                                           
##                   Kappa : 0.0762          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3 Class: 4 Class: 5
## Sensitivity           0.45205   0.0000   0.0000   0.0000   0.8000
## Specificity           0.79503   1.0000   1.0000   1.0000   0.2755
## Pos Pred Value        0.33333      NaN      NaN      NaN   0.3514
## Neg Pred Value        0.86486   0.8532   0.8025   0.8582   0.7374
## Prevalence            0.18481   0.1468   0.1975   0.1418   0.3291
## Detection Rate        0.08354   0.0000   0.0000   0.0000   0.2633
## Detection Prevalence  0.25063   0.0000   0.0000   0.0000   0.7494
## Balanced Accuracy     0.62354   0.5000   0.5000   0.5000   0.5377

# Interpretasi: Lihat 'Accuracy' dan 'Kappa' (Kappa > 0 menunjukkan klasifikasi lebih baik dari kebetulan).

Rasio Peluang (Odds Ratio - OR)OR menunjukkan perubahan peluang untuk berpindah ke kategori nilai yang lebih tinggi untuk setiap peningkatan 1 unit pada prediktor x

# Menghitung Odds Ratio (OR)
odds_ratio <- exp(coef(model_ordinal_5cat))
print("Odds Ratio (OR) untuk Setiap Prediktor:")

## [1] "Odds Ratio (OR) untuk Setiap Prediktor:"

print(odds_ratio)

##        x1        x2     x3yes     x4yes     x5yes 
## 0.8588157 1.0292234 0.2229630 0.5614341 2.5277258

# Menghitung Interval Kepercayaan (CI) 95% untuk OR
odds_ci <- exp(confint(model_ordinal_5cat))

## Waiting for profiling to be done...

print("Interval Kepercayaan (CI 95%) untuk OR:")

## [1] "Interval Kepercayaan (CI 95%) untuk OR:"

print(odds_ci)

##            2.5 %    97.5 %
## x1    0.69052943 1.0675315
## x2    1.00399023 1.0571620
## x3yes 0.08182462 0.5490495
## x4yes 0.34561017 0.9046956
## x5yes 1.49051900 4.3552997

# Interpretasi:
# OR > 1: Meningkatkan peluang untuk berada di kategori nilai yang LEBIH TINGGI.
# OR < 1: Menurunkan peluang untuk berada di kategori nilai yang LEBIH TINGGI (efek negatif).
# OR yang CI-nya tidak mencakup 1.0 dianggap signifikan.

# Instal paket (hanya perlu sekali)
#install.packages("MASS")
#install.packages("brant")

# Muat paket
library(MASS)
library(brant)

# Syntax Uji Brant:
uji_brant <- brant(model_ordinal_5cat)

## -------------------------------------------- 
## Test for X2  df  probability 
## -------------------------------------------- 
## Omnibus      14.59   15  0.48
## x1       0.48    3   0.92
## x2       5.78    3   0.12
## x3yes        0.87    3   0.83
## x4yes        3.07    3   0.38
## x5yes        5.07    3   0.17
## -------------------------------------------- 
## 
## H0: Parallel Regression Assumption holds

## Warning in brant(model_ordinal_5cat): 14 combinations in table(dv,ivs) do not
## occur. Because of that, the test results might be invalid.

print(uji_brant)

##                 X2 df probability
## Omnibus 14.5870965 15   0.4815475
## x1       0.4830582  3   0.9225974
## x2       5.7825315  3   0.1226834
## x3yes    0.8692029  3   0.8328528
## x4yes    3.0736296  3   0.3804117
## x5yes    5.0719323  3   0.1666050