For detail about the material please visit on https://medium.com/@hafizah.ilma

1 Preprocessing Data

1.1 Read Data

data <- read.csv("data_akreditasi.csv")
data <- data[,3:5]
head(data,3)
##   Status Akreditasi_1 Akreditasi_2
## 1 Negeri            B            A
## 2 Negeri            B            A
## 3 Negeri            B            A

1.2 Define of Data Type

data$Status <- as.factor(data$Status)
data$Akreditasi_1 <-as.factor(data$Akreditasi_1)
data$Akreditasi_2 <- ordered(data$Akreditasi_2, levels=c("TT","C","B","A"))
str(data)
## 'data.frame':    120 obs. of  3 variables:
##  $ Status      : Factor w/ 2 levels "Negeri","Swasta": 1 1 1 2 1 1 1 1 2 1 ...
##  $ Akreditasi_1: Factor w/ 4 levels "A","B","C","TT": 2 2 2 2 2 1 3 3 1 2 ...
##  $ Akreditasi_2: Ord.factor w/ 4 levels "TT"<"C"<"B"<"A": 4 4 4 3 3 4 3 4 4 3 ...

1.3 Spliting Data

library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
set.seed(100)
acak <- createDataPartition(data$Akreditasi_2, p=0.70, list=FALSE)
data.train <- data[acak,]
data.test <- data[-acak,]

2 Building Models

2.1 Model 0 (without dependent variables)

library(MASS)
model0<-polr(Akreditasi_2~1, method = "logistic", data = data.train) 

2.2 model 1 (with dependent variables)

model1<-polr(Akreditasi_2~., method = "logistic", data = data.train, Hess = T)
summary(model1)
## Call:
## polr(formula = Akreditasi_2 ~ ., data = data.train, Hess = T, 
##     method = "logistic")
## 
## Coefficients:
##                  Value Std. Error t value
## StatusSwasta   -1.6556     0.5836 -2.8370
## Akreditasi_1B  -1.7947     0.5902 -3.0408
## Akreditasi_1C  -2.5321     0.7904 -3.2035
## Akreditasi_1TT -0.8424     1.3664 -0.6165
## 
## Intercepts:
##      Value   Std. Error t value
## TT|C -4.5951  0.7761    -5.9208
## C|B  -4.1196  0.7499    -5.4934
## B|A  -2.8311  0.6720    -4.2129
## 
## Residual Deviance: 155.8561 
## AIC: 169.8561

3 Simultant Test

library(lmtest) 
lrtest(model0,model1) 
## Likelihood ratio test
## 
## Model 1: Akreditasi_2 ~ 1
## Model 2: Akreditasi_2 ~ Status + Akreditasi_1
##   #Df  LogLik Df  Chisq Pr(>Chisq)    
## 1   3 -91.578                         
## 2   7 -77.928  4 27.301  1.728e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4 Partial Test

coefmodel<-c(-model1$coefficients)
koefisien<-coef(summary(model1)) 
#menghitung pvalue 
p <- pnorm(abs(koefisien[,"t value"]), lower.tail = FALSE)*2
(ctabel<-cbind(round(koefisien,2), "pvalue"=round(p,3))) 
##                Value Std. Error t value pvalue
## StatusSwasta   -1.66       0.58   -2.84  0.005
## Akreditasi_1B  -1.79       0.59   -3.04  0.002
## Akreditasi_1C  -2.53       0.79   -3.20  0.001
## Akreditasi_1TT -0.84       1.37   -0.62  0.538
## TT|C           -4.60       0.78   -5.92  0.000
## C|B            -4.12       0.75   -5.49  0.000
## B|A            -2.83       0.67   -4.21  0.000

5 Prediction of Data Test

predict_prob = predict(model1, data.test, type = "prob")
head(predict_prob)
##            TT          C         B         A
## 1  0.05730169 0.03177851 0.1727637 0.7381561
## 7  0.11274921 0.05699187 0.2560726 0.5741864
## 9  0.05023676 0.02818637 0.1574470 0.7641298
## 11 0.24144364 0.09721384 0.3113856 0.3499569
## 18 0.05023676 0.02818637 0.1574470 0.7641298
## 27 0.24144364 0.09721384 0.3113856 0.3499569

6 Confussion Matrix of Model

prediksi.test <- predict(model1, data.test,type = "class")
data.test$Akreditasi_2<-as.factor(data.test$Akreditasi_2)
confusionMatrix(as.factor(prediksi.test), 
                data.test$Akreditasi_2, positive = "A")
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction TT  C  B  A
##         TT  0  1  1  0
##         C   0  0  0  0
##         B   0  0  0  0
##         A   4  0  6 22
## 
## Overall Statistics
##                                           
##                Accuracy : 0.6471          
##                  95% CI : (0.4649, 0.8025)
##     No Information Rate : 0.6471          
##     P-Value [Acc > NIR] : 0.5778          
##                                           
##                   Kappa : 0.0811          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: TT Class: C Class: B Class: A
## Sensitivity            0.00000  0.00000   0.0000   1.0000
## Specificity            0.93333  1.00000   1.0000   0.1667
## Pos Pred Value         0.00000      NaN      NaN   0.6875
## Neg Pred Value         0.87500  0.97059   0.7941   1.0000
## Prevalence             0.11765  0.02941   0.2059   0.6471
## Detection Rate         0.00000  0.00000   0.0000   0.6471
## Detection Prevalence   0.05882  0.00000   0.0000   0.9412
## Balanced Accuracy      0.46667  0.50000   0.5000   0.5833