For detail about the material please visit on https://medium.com/@hafizah.ilma
data <- read.csv("data_akreditasi.csv")
data <- data[,3:5]
head(data,3)
## Status Akreditasi_1 Akreditasi_2
## 1 Negeri B A
## 2 Negeri B A
## 3 Negeri B A
data$Status <- as.factor(data$Status)
data$Akreditasi_1 <-as.factor(data$Akreditasi_1)
data$Akreditasi_2 <- ordered(data$Akreditasi_2, levels=c("TT","C","B","A"))
str(data)
## 'data.frame': 120 obs. of 3 variables:
## $ Status : Factor w/ 2 levels "Negeri","Swasta": 1 1 1 2 1 1 1 1 2 1 ...
## $ Akreditasi_1: Factor w/ 4 levels "A","B","C","TT": 2 2 2 2 2 1 3 3 1 2 ...
## $ Akreditasi_2: Ord.factor w/ 4 levels "TT"<"C"<"B"<"A": 4 4 4 3 3 4 3 4 4 3 ...
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
set.seed(100)
acak <- createDataPartition(data$Akreditasi_2, p=0.70, list=FALSE)
data.train <- data[acak,]
data.test <- data[-acak,]
library(MASS)
model0<-polr(Akreditasi_2~1, method = "logistic", data = data.train)
model1<-polr(Akreditasi_2~., method = "logistic", data = data.train, Hess = T)
summary(model1)
## Call:
## polr(formula = Akreditasi_2 ~ ., data = data.train, Hess = T,
## method = "logistic")
##
## Coefficients:
## Value Std. Error t value
## StatusSwasta -1.6556 0.5836 -2.8370
## Akreditasi_1B -1.7947 0.5902 -3.0408
## Akreditasi_1C -2.5321 0.7904 -3.2035
## Akreditasi_1TT -0.8424 1.3664 -0.6165
##
## Intercepts:
## Value Std. Error t value
## TT|C -4.5951 0.7761 -5.9208
## C|B -4.1196 0.7499 -5.4934
## B|A -2.8311 0.6720 -4.2129
##
## Residual Deviance: 155.8561
## AIC: 169.8561
library(lmtest)
lrtest(model0,model1)
## Likelihood ratio test
##
## Model 1: Akreditasi_2 ~ 1
## Model 2: Akreditasi_2 ~ Status + Akreditasi_1
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 3 -91.578
## 2 7 -77.928 4 27.301 1.728e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coefmodel<-c(-model1$coefficients)
koefisien<-coef(summary(model1))
#menghitung pvalue
p <- pnorm(abs(koefisien[,"t value"]), lower.tail = FALSE)*2
(ctabel<-cbind(round(koefisien,2), "pvalue"=round(p,3)))
## Value Std. Error t value pvalue
## StatusSwasta -1.66 0.58 -2.84 0.005
## Akreditasi_1B -1.79 0.59 -3.04 0.002
## Akreditasi_1C -2.53 0.79 -3.20 0.001
## Akreditasi_1TT -0.84 1.37 -0.62 0.538
## TT|C -4.60 0.78 -5.92 0.000
## C|B -4.12 0.75 -5.49 0.000
## B|A -2.83 0.67 -4.21 0.000
predict_prob = predict(model1, data.test, type = "prob")
head(predict_prob)
## TT C B A
## 1 0.05730169 0.03177851 0.1727637 0.7381561
## 7 0.11274921 0.05699187 0.2560726 0.5741864
## 9 0.05023676 0.02818637 0.1574470 0.7641298
## 11 0.24144364 0.09721384 0.3113856 0.3499569
## 18 0.05023676 0.02818637 0.1574470 0.7641298
## 27 0.24144364 0.09721384 0.3113856 0.3499569
prediksi.test <- predict(model1, data.test,type = "class")
data.test$Akreditasi_2<-as.factor(data.test$Akreditasi_2)
confusionMatrix(as.factor(prediksi.test),
data.test$Akreditasi_2, positive = "A")
## Confusion Matrix and Statistics
##
## Reference
## Prediction TT C B A
## TT 0 1 1 0
## C 0 0 0 0
## B 0 0 0 0
## A 4 0 6 22
##
## Overall Statistics
##
## Accuracy : 0.6471
## 95% CI : (0.4649, 0.8025)
## No Information Rate : 0.6471
## P-Value [Acc > NIR] : 0.5778
##
## Kappa : 0.0811
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: TT Class: C Class: B Class: A
## Sensitivity 0.00000 0.00000 0.0000 1.0000
## Specificity 0.93333 1.00000 1.0000 0.1667
## Pos Pred Value 0.00000 NaN NaN 0.6875
## Neg Pred Value 0.87500 0.97059 0.7941 1.0000
## Prevalence 0.11765 0.02941 0.2059 0.6471
## Detection Rate 0.00000 0.00000 0.0000 0.6471
## Detection Prevalence 0.05882 0.00000 0.0000 0.9412
## Balanced Accuracy 0.46667 0.50000 0.5000 0.5833