Regresi Logistik Ordinal

#install package

library(ggplot2)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(corrplot)

## corrplot 0.92 loaded

library(caret)

## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift

library(tidyr)
library(dplyr)
library(knitr)
library(readxl)
library(MASS)

## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select

library(ResourceSelection)

## ResourceSelection 0.3-6   2023-06-27

library(lattice)
library(ggplot2)
library(caret)

#input data

dataDBDRSI <- read_excel("D:/ADK/Ordinal - DBD/Data DBD RSI Jemursari.xlsx")
head(dataDBDRSI)

#keterangan

str(dataDBDRSI)

## tibble [28 × 2] (S3: tbl_df/tbl/data.frame)
##  $ Y : num [1:28] 3 2 1 2 1 2 1 2 1 1 ...
##  $ X1: num [1:28] 7.16 2.58 10.11 4.42 6.27 ...

#mengubah variabel ke factor

dataDBDRSI$Y<- as.factor(dataDBDRSI$Y)
str(dataDBDRSI)

## tibble [28 × 2] (S3: tbl_df/tbl/data.frame)
##  $ Y : Factor w/ 3 levels "1","2","3": 3 2 1 2 1 2 1 2 1 1 ...
##  $ X1: num [1:28] 7.16 2.58 10.11 4.42 6.27 ...

library(plotrix)
mytable <- table(dataDBDRSI$Y)
lbls <- c("Derajat I","Derajat II", "Derajat III", "Derajat IV")
pct<-round(mytable/sum(mytable)*100)
lbls<-paste(lbls,pct)
lbls<-paste(lbls,"%",sep = "")
pie(mytable, labels = lbls,
    main="DBD",
    col=c("brown","coral", "grey"))

hist(dataDBDRSI$X1,
      main = "Histogram Usia",
          xlab = "Tingkat Usia",
          ylab = "jumlah",
          col = "lightblue")

library(RColorBrewer)
par(bty="o")
boxplot(X1~Y, data=dataDBDRSI, col=brewer.pal(n = 4, name = "RdBu") , xlab="Kategori DBD dengan Usia", horizontal=TRUE)

# Uji asumsi ## Uji multikolineritas

m1 <- glm(as.factor(dataDBDRSI$Y)~X1 , family = binomial(link = "logit"), data = dataDBDRSI)
summary(m1)

## 
## Call:
## glm(formula = as.factor(dataDBDRSI$Y) ~ X1, family = binomial(link = "logit"), 
##     data = dataDBDRSI)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  0.68505    0.63117   1.085   0.2778  
## X1          -0.07198    0.04335  -1.660   0.0968 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 38.243  on 27  degrees of freedom
## Residual deviance: 33.471  on 26  degrees of freedom
## AIC: 37.471
## 
## Number of Fisher Scoring iterations: 5

VIF<10

#model

library(MASS)
modelDBDRSI <- polr(Y~X1, data=dataDBDRSI)
summary(modelDBDRSI)

## 
## Re-fitting to get Hessian

## Call:
## polr(formula = Y ~ X1, data = dataDBDRSI)
## 
## Coefficients:
##       Value Std. Error t value
## X1 -0.07189     0.0419  -1.716
## 
## Intercepts:
##     Value   Std. Error t value
## 1|2 -0.6914  0.6190    -1.1170
## 2|3  0.6726  0.6533     1.0295
## 
## Residual Deviance: 49.59032 
## AIC: 55.59032

##uji serentak

library(pscl)

## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis

pR2(modelDBDRSI)

## fitting null model for pseudo-r2

##          llh      llhNull           G2     McFadden         r2ML         r2CU 
## -24.79516084 -27.27174613   4.95317056   0.09081139   0.16213555   0.18909292

##chisquare tabel

qchisq(0.90, 1)

## [1] 2.705543

Berdasarkan output tersebut diperoleh informasi bahwa pada taraf signifikansi 10%, H0 ditolak karena G2 = 4,95317056 > 2,705543. Artinya, ada pengaruh variabel independen terhadap variabel dependen secara bersama-sama. ##partial test

coefmodel <- c(-modelDBDRSI$coefficients)
koefisien<-coef(summary(modelDBDRSI))

## 
## Re-fitting to get Hessian

#menghitung p-value
p<-pnorm(abs(koefisien[,"t value"]),lower.tail = FALSE)*2
(ctabel<-cbind <-cbind(round(koefisien,4),"p-value"=round(p,4)))

##       Value Std. Error t value p-value
## X1  -0.0719     0.0419 -1.7157  0.0862
## 1|2 -0.6914     0.6190 -1.1170  0.2640
## 2|3  0.6726     0.6533  1.0295  0.3033

##confussion matrix

prediksi.test <- predict(modelDBDRSI, dataDBDRSI, type="class")
dataDBDRSI$Y <- as.factor(dataDBDRSI$Y)
confusionMatrix(as.factor(prediksi.test), dataDBDRSI$Y, positive="1")

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  1  2  3
##          1 16  7  5
##          2  0  0  0
##          3  0  0  0
## 
## Overall Statistics
##                                           
##                Accuracy : 0.5714          
##                  95% CI : (0.3718, 0.7554)
##     No Information Rate : 0.5714          
##     P-Value [Acc > NIR] : 0.579           
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            1.0000     0.00   0.0000
## Specificity            0.0000     1.00   1.0000
## Pos Pred Value         0.5714      NaN      NaN
## Neg Pred Value            NaN     0.75   0.8214
## Prevalence             0.5714     0.25   0.1786
## Detection Rate         0.5714     0.00   0.0000
## Detection Prevalence   1.0000     0.00   0.0000
## Balanced Accuracy      0.5000     0.50   0.5000

#odds ratio
data.frame(coef(modelDBDRSI), exp(coef(modelDBDRSI)))

Regresi Logistik Ordinal

Alfisyahrina Hapsery, Pingky Febriyanti, Izequela De Jesus Madeira, Risa Mudji Istiyawati

2023-07-18