PACKAGE YANG DIGUNAKAN
# PACKAGE YANG DIGUNAKAN
library(readxl)
library(MASS)
library(ordinal)
library(caret)
library(dplyr)
library(pscl)
IMPORT DATA
# IMPORT DATA
raw_data <- read_excel("C:/Users/dals/OneDrive - untirta.ac.id/KULIAH/Sem 5/data ordinal.xlsx",
skip = 1)
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...23`
## • `` -> `...24`
## • `` -> `...25`
## • `` -> `...27`
## • `` -> `...28`
## • `` -> `...29`
data_ordinal <- as.data.frame(raw_data)
MENGGANTI NAMA KOLOM
# MENGGANTI NAMA KOLOM
new_names <- c(
paste0("x", 1:5),
paste0("M", 1:4),
paste0("I", 1:4),
paste0("S", 1:4),
paste0("C", 1:4),
paste0("P", 1:4),
paste0("L", 1:4)
)
colnames(data_ordinal) <- new_names
BUAT VARIABEL KOMPOSIT X1–X6
# BUAT VARIABEL KOMPOSIT X1–X6
data_ordinal$X1 <- rowMeans(data_ordinal[, c("M1","M2","M3","M4")], na.rm = TRUE)
data_ordinal$X2 <- rowMeans(data_ordinal[, c("I1","I2","I3","I4")], na.rm = TRUE)
data_ordinal$X3 <- rowMeans(data_ordinal[, c("C1","C2","C3","C4")], na.rm = TRUE)
data_ordinal$X4 <- rowMeans(data_ordinal[, c("S1","S2","S3","S4")], na.rm = TRUE)
data_ordinal$X5 <- rowMeans(data_ordinal[, c("P1","P2","P3","P4")], na.rm = TRUE)
data_ordinal$X6 <- rowMeans(data_ordinal[, c("L1","L2","L3","L4")], na.rm = TRUE)
BUAT VARIABEL Y (ORDINAL)
# BUAT VARIABEL Y (ORDINAL)
data_ordinal$Y <- ordered(
cut(
data_ordinal$x1,
breaks = c(-Inf, 50, 100, 150, Inf),
labels = c("Sangat Rendah","Rendah","Sedang","Tinggi")
),
levels = c("Sangat Rendah","Rendah","Sedang","Tinggi")
)
MODEL REGRESI LOGISTIK CLM
# MODEL REGRESI LOGISTIK CLM
model_clm <- clm(Y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data_ordinal)
summary(model_clm)
## formula: Y ~ X1 + X2 + X3 + X4 + X5 + X6
## data: data_ordinal
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 204 -280.36 578.73 3(0) 5.22e-08 1.6e+04
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## X1 -0.1572 0.3632 -0.433 0.665
## X2 0.5559 0.3742 1.486 0.137
## X3 -0.2936 0.3933 -0.746 0.455
## X4 0.2031 0.3997 0.508 0.611
## X5 -0.5547 0.4306 -1.288 0.198
## X6 0.1732 0.2955 0.586 0.558
##
## Threshold coefficients:
## Estimate Std. Error z value
## Sangat Rendah|Rendah -1.4004 1.2654 -1.107
## Rendah|Sedang -0.3050 1.2658 -0.241
## Sedang|Tinggi 0.7728 1.2678 0.610
MODEL REGRESI LOGISTIK ORDINAL
# MODEL REGRESI LOGISTIK ORDINAL
model_polr <- polr(
Y ~ X1 + X2 + X3 + X4 + X5 + X6,
data = data_ordinal,
method = "logistic",
Hess = TRUE
)
summary(model_polr)
## Call:
## polr(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data_ordinal,
## Hess = TRUE, method = "logistic")
##
## Coefficients:
## Value Std. Error t value
## X1 -0.1572 0.3632 -0.4327
## X2 0.5559 0.3742 1.4857
## X3 -0.2935 0.3933 -0.7463
## X4 0.2031 0.3997 0.5081
## X5 -0.5547 0.4306 -1.2883
## X6 0.1732 0.2955 0.5860
##
## Intercepts:
## Value Std. Error t value
## Sangat Rendah|Rendah -1.4002 1.2654 -1.1065
## Rendah|Sedang -0.3049 1.2658 -0.2409
## Sedang|Tinggi 0.7729 1.2678 0.6097
##
## Residual Deviance: 560.7292
## AIC: 578.7292
ODDS RATIO
# ODDS RATIO
exp(cbind(OR = coef(model_polr), confint(model_polr)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## X1 0.8545400 0.4128099 1.729879
## X2 1.7435701 0.8413818 3.660259
## X3 0.7456148 0.3426054 1.607318
## X4 1.2251732 0.5621993 2.707560
## X5 0.5742225 0.2441091 1.326706
## X6 1.1890595 0.6647310 2.123515
PSEUDO R-SQUARE
# PSEUDO R-SQUARE
pR2(model_polr)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML
## -2.803646e+02 -2.826879e+02 4.646551e+00 8.218518e-03 2.251977e-02
## r2CU
## 2.402291e-02
SPLIT DATA & OVERSAMPLING
# SPLIT DATA & OVERSAMPLING
set.seed(10)
trainindex <- sample(1:nrow(data_ordinal), size = 0.7*nrow(data_ordinal))
data_train <- data_ordinal[trainindex, ]
data_test <- data_ordinal[-trainindex, ]
data_train_bal <- downSample(
x = data_train[, c("X1","X2","X3","X4","X5","X6")],
y = data_train$Y,
yname = "Y"
)
model_ts <- polr(
Y ~ X1 + X2 + X3 + X4 + X5 + X6,
data = data_train_bal,
method = "logistic",
Hess = TRUE
)
EVALUASI MODEL
# EVALUASI MODEL
prediksi <- predict(model_ts, data_test)
confmat <- table(True = data_test$Y, Pred = prediksi)
confmat
## Pred
## True Sangat Rendah Rendah Sedang Tinggi
## Sangat Rendah 3 4 0 8
## Rendah 11 1 1 5
## Sedang 11 0 2 5
## Tinggi 3 1 1 6
confusionMatrix(confmat)
## Confusion Matrix and Statistics
##
## Pred
## True Sangat Rendah Rendah Sedang Tinggi
## Sangat Rendah 3 4 0 8
## Rendah 11 1 1 5
## Sedang 11 0 2 5
## Tinggi 3 1 1 6
##
## Overall Statistics
##
## Accuracy : 0.1935
## 95% CI : (0.1042, 0.3137)
## No Information Rate : 0.4516
## P-Value [Acc > NIR] : 0.9999947
##
## Kappa : -0.0403
##
## Mcnemar's Test P-Value : 0.0008402
##
## Statistics by Class:
##
## Class: Sangat Rendah Class: Rendah Class: Sedang
## Sensitivity 0.10714 0.16667 0.50000
## Specificity 0.64706 0.69643 0.72414
## Pos Pred Value 0.20000 0.05556 0.11111
## Neg Pred Value 0.46809 0.88636 0.95455
## Prevalence 0.45161 0.09677 0.06452
## Detection Rate 0.04839 0.01613 0.03226
## Detection Prevalence 0.24194 0.29032 0.29032
## Balanced Accuracy 0.37710 0.43155 0.61207
## Class: Tinggi
## Sensitivity 0.25000
## Specificity 0.86842
## Pos Pred Value 0.54545
## Neg Pred Value 0.64706
## Prevalence 0.38710
## Detection Rate 0.09677
## Detection Prevalence 0.17742
## Balanced Accuracy 0.55921