library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(caret)
library(ResourceSelection)
## ResourceSelection 0.3-6 2023-06-27
library(readxl)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(readxl)
Biner_1 <- read_excel("C:/Users/halfi/OneDrive/Documents/Biner - Kelulusan Mahasiswa/Biner 1.xlsx")
View(Biner_1)
str(Biner_1)
## tibble [1,762 × 8] (S3: tbl_df/tbl/data.frame)
## $ NIM : num [1:1762] 1.81e+08 1.81e+08 1.81e+08 1.81e+08 1.81e+08 ...
## $ Status : chr [1:1762] "Tepat Waktu" "Tidak Tepat" "Tidak Tepat" "Tidak Tepat" ...
## $ Asal : chr [1:1762] "Surabaya" "Luar Surabaya" "Luar Surabaya" "Surabaya" ...
## $ IPK : chr [1:1762] "pujian" "sangat memuaskan" "memuaskan" "sangat memuaskan" ...
## $ Kelas : chr [1:1762] "Pagi" "Pagi" "Pagi" "Pagi" ...
## $ Kelompok: chr [1:1762] "Soshum" "Soshum" "Soshum" "Soshum" ...
## $ JK : chr [1:1762] "P" "P" "P" "P" ...
## $ Usia : num [1:1762] 56 22 55 22 30 22 22 22 22 24 ...
library(plotrix)
mytable <- table(Biner_1$Status)
lbls <- c("Tepat Waktu","Tidak Tepat")
pct<-round(mytable/sum(mytable)*100)
lbls<-paste(lbls,pct)
lbls<-paste(lbls,"%",sep = "")
pie(mytable, labels = lbls,
main="Status Kelulusan",
col=c("aquamarine","lightblue"))
4. Mengubah data menjadi data faktor
Biner_1$Status <- as.factor(Biner_1$Status)
Biner_1$Asal <- as.factor(Biner_1$Asal)
Biner_1$IPK <- as.factor(Biner_1$IPK)
Biner_1$Kelas <- as.factor(Biner_1$Kelas)
Biner_1$Kelompok <- as.factor(Biner_1$Kelompok)
Biner_1$JK <- as.factor(Biner_1$JK)
data2 <- table(Biner_1$Status,Biner_1$Asal)
data2
##
## Luar Surabaya Surabaya
## Tepat Waktu 1093 301
## Tidak Tepat 274 94
data3 <- table(Biner_1$Status,Biner_1$IPK)
data3
##
## memuaskan pujian sangat memuaskan
## Tepat Waktu 1 904 489
## Tidak Tepat 150 24 194
data4 <- table(Biner_1$Status,Biner_1$Kelas)
data4
##
## Malam Pagi
## Tepat Waktu 137 1257
## Tidak Tepat 70 298
data5 <- table(Biner_1$Status,Biner_1$Kelompok)
data5
##
## Saintek Soshum
## Tepat Waktu 381 1013
## Tidak Tepat 109 259
data6 <- table(Biner_1$Status,Biner_1$JK)
data6
##
## L P
## Tepat Waktu 428 966
## Tidak Tepat 217 151
c2=chisq.test(Biner_1$Status, Biner_1$Asal)
c2
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: Biner_1$Status and Biner_1$Asal
## X-squared = 2.3908, df = 1, p-value = 0.122
c3=chisq.test(Biner_1$Status, Biner_1$IPK)
c3
##
## Pearson's Chi-squared test
##
## data: Biner_1$Status and Biner_1$IPK
## X-squared = 773.89, df = 2, p-value < 2.2e-16
c4=chisq.test(Biner_1$Status, Biner_1$Kelas)
c4
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: Biner_1$Status and Biner_1$Kelas
## X-squared = 22.858, df = 1, p-value = 1.744e-06
c5=chisq.test(Biner_1$Status, Biner_1$Kelompok)
c5
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: Biner_1$Status and Biner_1$Kelompok
## X-squared = 0.64958, df = 1, p-value = 0.4203
c6=chisq.test(Biner_1$Status, Biner_1$JK)
c6
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: Biner_1$Status and Biner_1$JK
## X-squared = 99.012, df = 1, p-value < 2.2e-16
M2 <- glm(as.factor(Biner_1$Status)~Asal, family = binomial(link = "logit"), data = Biner_1)
M3 <- glm(as.factor(Biner_1$Status)~IPK, family = binomial(link = "logit"), data = Biner_1)
M4 <- glm(as.factor(Biner_1$Status)~Kelas, family = binomial(link = "logit"), data = Biner_1)
M5 <- glm(as.factor(Biner_1$Status)~Kelompok, family = binomial(link = "logit"), data = Biner_1)
M6 <- glm(as.factor(Biner_1$Status)~JK, family = binomial(link = "logit"), data = Biner_1)
M7 <- glm(as.factor(Biner_1$Status)~Usia, family = binomial(link = "logit"), data = Biner_1)
summary(M2)
##
## Call:
## glm(formula = as.factor(Biner_1$Status) ~ Asal, family = binomial(link = "logit"),
## data = Biner_1)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.38355 0.06756 -20.478 <2e-16 ***
## AsalSurabaya 0.21974 0.13611 1.614 0.106
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1805.8 on 1761 degrees of freedom
## Residual deviance: 1803.3 on 1760 degrees of freedom
## AIC: 1807.3
##
## Number of Fisher Scoring iterations: 4
summary(M3)
##
## Call:
## glm(formula = as.factor(Biner_1$Status) ~ IPK, family = binomial(link = "logit"),
## data = Biner_1)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.011 1.003 4.995 5.89e-07 ***
## IPKpujian -8.639 1.024 -8.435 < 2e-16 ***
## IPKsangat memuaskan -5.935 1.007 -5.895 3.74e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1805.8 on 1761 degrees of freedom
## Residual deviance: 1050.0 on 1759 degrees of freedom
## AIC: 1056
##
## Number of Fisher Scoring iterations: 7
summary(M4)
##
## Call:
## glm(formula = as.factor(Biner_1$Status) ~ Kelas, family = binomial(link = "logit"),
## data = Biner_1)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.6715 0.1469 -4.570 4.87e-06 ***
## KelasPagi -0.7679 0.1604 -4.787 1.70e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1805.8 on 1761 degrees of freedom
## Residual deviance: 1784.4 on 1760 degrees of freedom
## AIC: 1788.4
##
## Number of Fisher Scoring iterations: 4
summary(M5)
##
## Call:
## glm(formula = as.factor(Biner_1$Status) ~ Kelompok, family = binomial(link = "logit"),
## data = Biner_1)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.2515 0.1086 -11.521 <2e-16 ***
## KelompokSoshum -0.1124 0.1290 -0.871 0.384
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1805.8 on 1761 degrees of freedom
## Residual deviance: 1805.1 on 1760 degrees of freedom
## AIC: 1809.1
##
## Number of Fisher Scoring iterations: 4
summary(M6)
##
## Call:
## glm(formula = as.factor(Biner_1$Status) ~ JK, family = binomial(link = "logit"),
## data = Biner_1)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.67923 0.08334 -8.151 3.62e-16 ***
## JKP -1.17666 0.12084 -9.737 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1805.8 on 1761 degrees of freedom
## Residual deviance: 1708.8 on 1760 degrees of freedom
## AIC: 1712.8
##
## Number of Fisher Scoring iterations: 4
summary(M7)
##
## Call:
## glm(formula = as.factor(Biner_1$Status) ~ Usia, family = binomial(link = "logit"),
## data = Biner_1)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.96464 0.68198 -5.813 6.12e-09 ***
## Usia 0.11467 0.02957 3.879 0.000105 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1805.8 on 1761 degrees of freedom
## Residual deviance: 1788.0 on 1760 degrees of freedom
## AIC: 1792
##
## Number of Fisher Scoring iterations: 4
logit1 = glm(Biner_1$Status~Biner_1$IPK+Biner_1$Kelas+Biner_1$JK, data = Biner_1, family = binomial)
summary(logit1)
##
## Call:
## glm(formula = Biner_1$Status ~ Biner_1$IPK + Biner_1$Kelas +
## Biner_1$JK, family = binomial, data = Biner_1)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.7484 1.0230 5.619 1.92e-08 ***
## Biner_1$IPKpujian -8.7884 1.0303 -8.530 < 2e-16 ***
## Biner_1$IPKsangat memuaskan -5.9905 1.0081 -5.942 2.81e-09 ***
## Biner_1$KelasPagi -0.9071 0.2206 -4.112 3.93e-05 ***
## Biner_1$JKP 0.2175 0.1654 1.315 0.189
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1805.8 on 1761 degrees of freedom
## Residual deviance: 1033.3 on 1757 degrees of freedom
## AIC: 1043.3
##
## Number of Fisher Scoring iterations: 7
logit2 = glm(Biner_1$Status~Biner_1$IPK+Biner_1$Kelas, data = Biner_1, family = binomial)
summary(logit2)
##
## Call:
## glm(formula = Biner_1$Status ~ Biner_1$IPK + Biner_1$Kelas, family = binomial,
## data = Biner_1)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.7624 1.0229 5.634 1.76e-08 ***
## Biner_1$IPKpujian -8.6577 1.0249 -8.448 < 2e-16 ***
## Biner_1$IPKsangat memuaskan -5.9380 1.0071 -5.896 3.72e-09 ***
## Biner_1$KelasPagi -0.8658 0.2179 -3.973 7.09e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1805.8 on 1761 degrees of freedom
## Residual deviance: 1035.0 on 1758 degrees of freedom
## AIC: 1043
##
## Number of Fisher Scoring iterations: 7
#serentak
library(pscl)
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
pR2(logit1)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML r2CU
## -516.6490923 -902.9083155 772.5184464 0.4277945 0.3549534 0.5536147
qchisq(0.90,4)
## [1] 7.77944
#setelah uji independensi
logit3= glm(Biner_1$Status~Biner_1$IPK+Biner_1$Kelas+Biner_1$JK+Biner_1$Usia, data = Biner_1, family = binomial)
summary(logit3)
##
## Call:
## glm(formula = Biner_1$Status ~ Biner_1$IPK + Biner_1$Kelas +
## Biner_1$JK + Biner_1$Usia, family = binomial, data = Biner_1)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.06402 1.43971 3.517 0.000436 ***
## Biner_1$IPKpujian -8.77632 1.03039 -8.517 < 2e-16 ***
## Biner_1$IPKsangat memuaskan -5.98145 1.00822 -5.933 2.98e-09 ***
## Biner_1$KelasPagi -0.86964 0.22767 -3.820 0.000134 ***
## Biner_1$JKP 0.22829 0.16632 1.373 0.169873
## Biner_1$Usia 0.02773 0.04125 0.672 0.501377
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1805.8 on 1761 degrees of freedom
## Residual deviance: 1032.9 on 1756 degrees of freedom
## AIC: 1044.9
##
## Number of Fisher Scoring iterations: 7
logit4= glm(Biner_1$Status~Biner_1$IPK+Biner_1$Kelas, data = Biner_1, family = binomial)
summary(logit4)
##
## Call:
## glm(formula = Biner_1$Status ~ Biner_1$IPK + Biner_1$Kelas, family = binomial,
## data = Biner_1)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.7624 1.0229 5.634 1.76e-08 ***
## Biner_1$IPKpujian -8.6577 1.0249 -8.448 < 2e-16 ***
## Biner_1$IPKsangat memuaskan -5.9380 1.0071 -5.896 3.72e-09 ***
## Biner_1$KelasPagi -0.8658 0.2179 -3.973 7.09e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1805.8 on 1761 degrees of freedom
## Residual deviance: 1035.0 on 1758 degrees of freedom
## AIC: 1043
##
## Number of Fisher Scoring iterations: 7
#Uji serentak #serentak
library(pscl)
pR2(logit4)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML r2CU
## -517.5174056 -902.9083155 770.7818199 0.4268328 0.3543173 0.5526226
qchisq(0.90,4)
## [1] 7.77944
#Pemilihan Model Terbaik
AIC(logit1)
## [1] 1043.298
AIC(logit2)
## [1] 1043.035
AIC(logit3)
## [1] 1044.861
AIC(logit4)
## [1] 1043.035
prob.predik <- predict(logit2, Biner_1, type = "response")
prediksi <- ifelse(prob.predik>0.5, "Tidak Tepat", "Tepat Waktu" )
library(caret)
pred.aktual <- data.frame(prediksi, Biner_1$Status)
confusionMatrix(as.factor(prediksi), Biner_1$Status)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Tepat Waktu Tidak Tepat
## Tepat Waktu 1393 218
## Tidak Tepat 1 150
##
## Accuracy : 0.8757
## 95% CI : (0.8594, 0.8908)
## No Information Rate : 0.7911
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5197
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9993
## Specificity : 0.4076
## Pos Pred Value : 0.8647
## Neg Pred Value : 0.9934
## Prevalence : 0.7911
## Detection Rate : 0.7906
## Detection Prevalence : 0.9143
## Balanced Accuracy : 0.7034
##
## 'Positive' Class : Tepat Waktu
##
tab1<- table(Predicted =prediksi, Actual =Biner_1$Status)
presisi <- precision(tab1)
presisi
## [1] 0.8646803
exp(coef(logit2))
## (Intercept) Biner_1$IPKpujian
## 3.181168e+02 1.737915e-04
## Biner_1$IPKsangat memuaskan Biner_1$KelasPagi
## 2.637297e-03 4.206955e-01