1. Mempersiapkan library
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(caret)
library(ResourceSelection)
## ResourceSelection 0.3-6   2023-06-27
library(readxl)
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
  1. Menginport dana
library(readxl)
Biner_1 <- read_excel("C:/Users/halfi/OneDrive/Documents/Biner - Kelulusan Mahasiswa/Biner 1.xlsx")
View(Biner_1)
str(Biner_1)
## tibble [1,762 × 8] (S3: tbl_df/tbl/data.frame)
##  $ NIM     : num [1:1762] 1.81e+08 1.81e+08 1.81e+08 1.81e+08 1.81e+08 ...
##  $ Status  : chr [1:1762] "Tepat Waktu" "Tidak Tepat" "Tidak Tepat" "Tidak Tepat" ...
##  $ Asal    : chr [1:1762] "Surabaya" "Luar Surabaya" "Luar Surabaya" "Surabaya" ...
##  $ IPK     : chr [1:1762] "pujian" "sangat memuaskan" "memuaskan" "sangat memuaskan" ...
##  $ Kelas   : chr [1:1762] "Pagi" "Pagi" "Pagi" "Pagi" ...
##  $ Kelompok: chr [1:1762] "Soshum" "Soshum" "Soshum" "Soshum" ...
##  $ JK      : chr [1:1762] "P" "P" "P" "P" ...
##  $ Usia    : num [1:1762] 56 22 55 22 30 22 22 22 22 24 ...
  1. Visualisasi data
library(plotrix)
mytable <- table(Biner_1$Status)
lbls <- c("Tepat Waktu","Tidak Tepat")
pct<-round(mytable/sum(mytable)*100)
lbls<-paste(lbls,pct)
lbls<-paste(lbls,"%",sep = "")
pie(mytable, labels = lbls,
    main="Status Kelulusan",
    col=c("aquamarine","lightblue"))

4. Mengubah data menjadi data faktor

Biner_1$Status <- as.factor(Biner_1$Status)
Biner_1$Asal <- as.factor(Biner_1$Asal)
Biner_1$IPK <- as.factor(Biner_1$IPK)
Biner_1$Kelas <- as.factor(Biner_1$Kelas)
Biner_1$Kelompok <- as.factor(Biner_1$Kelompok)
Biner_1$JK <- as.factor(Biner_1$JK)
  1. Tabel kontingensi
data2 <- table(Biner_1$Status,Biner_1$Asal)
data2
##              
##               Luar Surabaya Surabaya
##   Tepat Waktu          1093      301
##   Tidak Tepat           274       94
data3 <- table(Biner_1$Status,Biner_1$IPK)
data3
##              
##               memuaskan pujian sangat memuaskan
##   Tepat Waktu         1    904              489
##   Tidak Tepat       150     24              194
data4 <- table(Biner_1$Status,Biner_1$Kelas)
data4
##              
##               Malam Pagi
##   Tepat Waktu   137 1257
##   Tidak Tepat    70  298
data5 <- table(Biner_1$Status,Biner_1$Kelompok)
data5
##              
##               Saintek Soshum
##   Tepat Waktu     381   1013
##   Tidak Tepat     109    259
data6 <- table(Biner_1$Status,Biner_1$JK)
data6
##              
##                 L   P
##   Tepat Waktu 428 966
##   Tidak Tepat 217 151
  1. uji Independensi
c2=chisq.test(Biner_1$Status, Biner_1$Asal)
c2
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  Biner_1$Status and Biner_1$Asal
## X-squared = 2.3908, df = 1, p-value = 0.122
c3=chisq.test(Biner_1$Status, Biner_1$IPK)
c3
## 
##  Pearson's Chi-squared test
## 
## data:  Biner_1$Status and Biner_1$IPK
## X-squared = 773.89, df = 2, p-value < 2.2e-16
c4=chisq.test(Biner_1$Status, Biner_1$Kelas)
c4
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  Biner_1$Status and Biner_1$Kelas
## X-squared = 22.858, df = 1, p-value = 1.744e-06
c5=chisq.test(Biner_1$Status, Biner_1$Kelompok)
c5
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  Biner_1$Status and Biner_1$Kelompok
## X-squared = 0.64958, df = 1, p-value = 0.4203
c6=chisq.test(Biner_1$Status, Biner_1$JK)
c6
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  Biner_1$Status and Biner_1$JK
## X-squared = 99.012, df = 1, p-value < 2.2e-16
  1. Uji Univariate
M2 <- glm(as.factor(Biner_1$Status)~Asal, family = binomial(link = "logit"), data = Biner_1)
M3 <- glm(as.factor(Biner_1$Status)~IPK, family = binomial(link = "logit"), data = Biner_1)
M4 <- glm(as.factor(Biner_1$Status)~Kelas, family = binomial(link = "logit"), data = Biner_1)
M5 <- glm(as.factor(Biner_1$Status)~Kelompok, family = binomial(link = "logit"), data = Biner_1)
M6 <- glm(as.factor(Biner_1$Status)~JK, family = binomial(link = "logit"), data = Biner_1)
M7 <- glm(as.factor(Biner_1$Status)~Usia, family = binomial(link = "logit"), data = Biner_1)
summary(M2)
## 
## Call:
## glm(formula = as.factor(Biner_1$Status) ~ Asal, family = binomial(link = "logit"), 
##     data = Biner_1)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.38355    0.06756 -20.478   <2e-16 ***
## AsalSurabaya  0.21974    0.13611   1.614    0.106    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1805.8  on 1761  degrees of freedom
## Residual deviance: 1803.3  on 1760  degrees of freedom
## AIC: 1807.3
## 
## Number of Fisher Scoring iterations: 4
summary(M3)
## 
## Call:
## glm(formula = as.factor(Biner_1$Status) ~ IPK, family = binomial(link = "logit"), 
##     data = Biner_1)
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            5.011      1.003   4.995 5.89e-07 ***
## IPKpujian             -8.639      1.024  -8.435  < 2e-16 ***
## IPKsangat memuaskan   -5.935      1.007  -5.895 3.74e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1805.8  on 1761  degrees of freedom
## Residual deviance: 1050.0  on 1759  degrees of freedom
## AIC: 1056
## 
## Number of Fisher Scoring iterations: 7
summary(M4)
## 
## Call:
## glm(formula = as.factor(Biner_1$Status) ~ Kelas, family = binomial(link = "logit"), 
##     data = Biner_1)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.6715     0.1469  -4.570 4.87e-06 ***
## KelasPagi    -0.7679     0.1604  -4.787 1.70e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1805.8  on 1761  degrees of freedom
## Residual deviance: 1784.4  on 1760  degrees of freedom
## AIC: 1788.4
## 
## Number of Fisher Scoring iterations: 4
summary(M5)
## 
## Call:
## glm(formula = as.factor(Biner_1$Status) ~ Kelompok, family = binomial(link = "logit"), 
##     data = Biner_1)
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -1.2515     0.1086 -11.521   <2e-16 ***
## KelompokSoshum  -0.1124     0.1290  -0.871    0.384    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1805.8  on 1761  degrees of freedom
## Residual deviance: 1805.1  on 1760  degrees of freedom
## AIC: 1809.1
## 
## Number of Fisher Scoring iterations: 4
summary(M6)
## 
## Call:
## glm(formula = as.factor(Biner_1$Status) ~ JK, family = binomial(link = "logit"), 
##     data = Biner_1)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.67923    0.08334  -8.151 3.62e-16 ***
## JKP         -1.17666    0.12084  -9.737  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1805.8  on 1761  degrees of freedom
## Residual deviance: 1708.8  on 1760  degrees of freedom
## AIC: 1712.8
## 
## Number of Fisher Scoring iterations: 4
summary(M7)
## 
## Call:
## glm(formula = as.factor(Biner_1$Status) ~ Usia, family = binomial(link = "logit"), 
##     data = Biner_1)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.96464    0.68198  -5.813 6.12e-09 ***
## Usia         0.11467    0.02957   3.879 0.000105 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1805.8  on 1761  degrees of freedom
## Residual deviance: 1788.0  on 1760  degrees of freedom
## AIC: 1792
## 
## Number of Fisher Scoring iterations: 4
  1. Uji Multivaraite #Parsial
logit1 = glm(Biner_1$Status~Biner_1$IPK+Biner_1$Kelas+Biner_1$JK, data = Biner_1, family = binomial)
summary(logit1)
## 
## Call:
## glm(formula = Biner_1$Status ~ Biner_1$IPK + Biner_1$Kelas + 
##     Biner_1$JK, family = binomial, data = Biner_1)
## 
## Coefficients:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                   5.7484     1.0230   5.619 1.92e-08 ***
## Biner_1$IPKpujian            -8.7884     1.0303  -8.530  < 2e-16 ***
## Biner_1$IPKsangat memuaskan  -5.9905     1.0081  -5.942 2.81e-09 ***
## Biner_1$KelasPagi            -0.9071     0.2206  -4.112 3.93e-05 ***
## Biner_1$JKP                   0.2175     0.1654   1.315    0.189    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1805.8  on 1761  degrees of freedom
## Residual deviance: 1033.3  on 1757  degrees of freedom
## AIC: 1043.3
## 
## Number of Fisher Scoring iterations: 7
logit2 = glm(Biner_1$Status~Biner_1$IPK+Biner_1$Kelas, data = Biner_1, family = binomial)
summary(logit2)
## 
## Call:
## glm(formula = Biner_1$Status ~ Biner_1$IPK + Biner_1$Kelas, family = binomial, 
##     data = Biner_1)
## 
## Coefficients:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                   5.7624     1.0229   5.634 1.76e-08 ***
## Biner_1$IPKpujian            -8.6577     1.0249  -8.448  < 2e-16 ***
## Biner_1$IPKsangat memuaskan  -5.9380     1.0071  -5.896 3.72e-09 ***
## Biner_1$KelasPagi            -0.8658     0.2179  -3.973 7.09e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1805.8  on 1761  degrees of freedom
## Residual deviance: 1035.0  on 1758  degrees of freedom
## AIC: 1043
## 
## Number of Fisher Scoring iterations: 7

#serentak

library(pscl)
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
pR2(logit1)
## fitting null model for pseudo-r2
##          llh      llhNull           G2     McFadden         r2ML         r2CU 
## -516.6490923 -902.9083155  772.5184464    0.4277945    0.3549534    0.5536147
qchisq(0.90,4)
## [1] 7.77944

#setelah uji independensi

logit3= glm(Biner_1$Status~Biner_1$IPK+Biner_1$Kelas+Biner_1$JK+Biner_1$Usia, data = Biner_1, family = binomial)
summary(logit3)
## 
## Call:
## glm(formula = Biner_1$Status ~ Biner_1$IPK + Biner_1$Kelas + 
##     Biner_1$JK + Biner_1$Usia, family = binomial, data = Biner_1)
## 
## Coefficients:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  5.06402    1.43971   3.517 0.000436 ***
## Biner_1$IPKpujian           -8.77632    1.03039  -8.517  < 2e-16 ***
## Biner_1$IPKsangat memuaskan -5.98145    1.00822  -5.933 2.98e-09 ***
## Biner_1$KelasPagi           -0.86964    0.22767  -3.820 0.000134 ***
## Biner_1$JKP                  0.22829    0.16632   1.373 0.169873    
## Biner_1$Usia                 0.02773    0.04125   0.672 0.501377    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1805.8  on 1761  degrees of freedom
## Residual deviance: 1032.9  on 1756  degrees of freedom
## AIC: 1044.9
## 
## Number of Fisher Scoring iterations: 7
logit4= glm(Biner_1$Status~Biner_1$IPK+Biner_1$Kelas, data = Biner_1, family = binomial)
summary(logit4)
## 
## Call:
## glm(formula = Biner_1$Status ~ Biner_1$IPK + Biner_1$Kelas, family = binomial, 
##     data = Biner_1)
## 
## Coefficients:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                   5.7624     1.0229   5.634 1.76e-08 ***
## Biner_1$IPKpujian            -8.6577     1.0249  -8.448  < 2e-16 ***
## Biner_1$IPKsangat memuaskan  -5.9380     1.0071  -5.896 3.72e-09 ***
## Biner_1$KelasPagi            -0.8658     0.2179  -3.973 7.09e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1805.8  on 1761  degrees of freedom
## Residual deviance: 1035.0  on 1758  degrees of freedom
## AIC: 1043
## 
## Number of Fisher Scoring iterations: 7

#Uji serentak #serentak

library(pscl)
pR2(logit4)
## fitting null model for pseudo-r2
##          llh      llhNull           G2     McFadden         r2ML         r2CU 
## -517.5174056 -902.9083155  770.7818199    0.4268328    0.3543173    0.5526226
qchisq(0.90,4)
## [1] 7.77944

#Pemilihan Model Terbaik

AIC(logit1)
## [1] 1043.298
AIC(logit2)
## [1] 1043.035
AIC(logit3)
## [1] 1044.861
AIC(logit4)
## [1] 1043.035
  1. Klasifikasi
prob.predik <- predict(logit2, Biner_1, type = "response")
prediksi <- ifelse(prob.predik>0.5, "Tidak Tepat", "Tepat Waktu" )
library(caret)
pred.aktual <- data.frame(prediksi, Biner_1$Status)
confusionMatrix(as.factor(prediksi), Biner_1$Status)
## Confusion Matrix and Statistics
## 
##              Reference
## Prediction    Tepat Waktu Tidak Tepat
##   Tepat Waktu        1393         218
##   Tidak Tepat           1         150
##                                           
##                Accuracy : 0.8757          
##                  95% CI : (0.8594, 0.8908)
##     No Information Rate : 0.7911          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5197          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9993          
##             Specificity : 0.4076          
##          Pos Pred Value : 0.8647          
##          Neg Pred Value : 0.9934          
##              Prevalence : 0.7911          
##          Detection Rate : 0.7906          
##    Detection Prevalence : 0.9143          
##       Balanced Accuracy : 0.7034          
##                                           
##        'Positive' Class : Tepat Waktu     
## 
tab1<- table(Predicted =prediksi, Actual =Biner_1$Status)
presisi <- precision(tab1)
presisi
## [1] 0.8646803
  1. odds ratio
exp(coef(logit2))
##                 (Intercept)           Biner_1$IPKpujian 
##                3.181168e+02                1.737915e-04 
## Biner_1$IPKsangat memuaskan           Biner_1$KelasPagi 
##                2.637297e-03                4.206955e-01