Library yang digunakan

library(readxl)
library(base)
library(stats)
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(ResourceSelection)
## ResourceSelection 0.3-5   2019-07-22
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
## 
##     cluster

Import Data

data1 <- read_excel("data skripsi fuad.xlsx")
data1 <- na.omit(data1)
data1$Status<- as.factor(data1$Status)
data1$JK<- as.factor(data1$JK)
data1$Kelompok <- as.factor(data1$Kelompok)
data1$Asal<- as.factor(data1$Asal)
data1$Kelas<- as.factor(data1$Kelas)
data1$IPK <- as.factor(data1$IPK)
head(data1)

Tabulasi Silang

#tabel observasi
obsjk <- table(data1$Status,data1$JK)
obskelompok <- table(data1$Status,data1$Kelompok)
obsasal <- table(data1$Status,data1$Asal)
obskelas <- table(data1$Status,data1$Kelas)
obsipk <- table(data1$Status,data1$IPK)
#tabel ekspektasi
obsjk
##              
##                 L   P
##   Tepat Waktu 428 966
##   Tidak Tepat 217 151
chisq.test(obsjk)$expected
##              
##                      L        P
##   Tepat Waktu 510.2894 883.7106
##   Tidak Tepat 134.7106 233.2894
obskelompok
##              
##               Saintek Soshum
##   Tepat Waktu     381   1013
##   Tidak Tepat     109    259
chisq.test(obskelompok)$expected
##              
##                Saintek    Soshum
##   Tepat Waktu 387.6617 1006.3383
##   Tidak Tepat 102.3383  265.6617
obsasal
##              
##               Luar Surabaya Surabaya
##   Tepat Waktu          1093      301
##   Tidak Tepat           274       94
chisq.test(obsasal)$expected
##              
##               Luar Surabaya  Surabaya
##   Tepat Waktu     1081.4972 312.50284
##   Tidak Tepat      285.5028  82.49716
obskelas
##              
##               Malam Pagi
##   Tepat Waktu   137 1257
##   Tidak Tepat    70  298
chisq.test(obskelas)$expected
##              
##                   Malam      Pagi
##   Tepat Waktu 163.76731 1230.2327
##   Tidak Tepat  43.23269  324.7673
obsipk
##              
##               memuaskan pujian sangat memuaskan
##   Tepat Waktu         1    904              489
##   Tidak Tepat       150     24              194
chisq.test(obsipk)$expected
##              
##               memuaskan   pujian sangat memuaskan
##   Tepat Waktu 119.46311 734.1839          540.353
##   Tidak Tepat  31.53689 193.8161          142.647

Uji Independensi

chisq.test(obsjk)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  obsjk
## X-squared = 99.012, df = 1, p-value < 2.2e-16
chisq.test(obskelompok)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  obskelompok
## X-squared = 0.64958, df = 1, p-value = 0.4203
chisq.test(obsasal)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  obsasal
## X-squared = 2.3908, df = 1, p-value = 0.122
chisq.test(obskelas)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  obskelas
## X-squared = 22.858, df = 1, p-value = 1.744e-06
chisq.test(obsipk)
## 
##  Pearson's Chi-squared test
## 
## data:  obsipk
## X-squared = 773.89, df = 2, p-value < 2.2e-16
chisq.test(table(data1$Status,data1$Usia))
## Warning in chisq.test(table(data1$Status, data1$Usia)): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  table(data1$Status, data1$Usia)
## X-squared = 72.44, df = 20, p-value = 7.241e-08

Regresi Logistik Biner

data <- read_excel("data skripsi fuad.xlsx", sheet = "presh")
data <- na.omit(data)
data$Status<- as.factor(data$Status)
data$JK<- as.factor(data$JK)
data$Kelompok <- as.factor(data$Kelompok)
data$Asal<- as.factor(data$Asal)
data$Kelas<- as.factor(data$Kelas)
data$IPK <- as.factor(data$IPK)
model <- glm(Status~IPK+Kelas+JK+Usia,
             data = data, family = "binomial")
summary(model)
## 
## Call:
## glm(formula = Status ~ IPK + Kelas + JK + Usia, family = "binomial", 
##     data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8251   0.1959   0.2163   0.7330   3.1025  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -4.42266    1.38185  -3.201 0.001372 ** 
## IPK1         5.98145    1.00822   5.933 2.98e-09 ***
## IPK2         8.77632    1.03039   8.517  < 2e-16 ***
## Kelas1      -0.86964    0.22767  -3.820 0.000134 ***
## JK1          0.22829    0.16632   1.373 0.169873    
## Usia        -0.02773    0.04125  -0.672 0.501377    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1805.8  on 1761  degrees of freedom
## Residual deviance: 1032.9  on 1756  degrees of freedom
## AIC: 1044.9
## 
## Number of Fisher Scoring iterations: 7

Pengujian Signifikansi Parameter

Uji Serentak

lrtest(model)

Uji Parsial

regTermTest(model,"IPK")
## Wald test for IPK
##  in glm(formula = Status ~ IPK + Kelas + JK + Usia, family = "binomial", 
##     data = data)
## F =  91.20187  on  2  and  1756  df: p= < 2.22e-16
regTermTest(model,"Kelas")
## Wald test for Kelas
##  in glm(formula = Status ~ IPK + Kelas + JK + Usia, family = "binomial", 
##     data = data)
## F =  14.58995  on  1  and  1756  df: p= 0.00013827
regTermTest(model,"JK")
## Wald test for JK
##  in glm(formula = Status ~ IPK + Kelas + JK + Usia, family = "binomial", 
##     data = data)
## F =  1.884064  on  1  and  1756  df: p= 0.17005
regTermTest(model,"Usia")
## Wald test for Usia
##  in glm(formula = Status ~ IPK + Kelas + JK + Usia, family = "binomial", 
##     data = data)
## F =  0.4520201  on  1  and  1756  df: p= 0.50147

Pemilihan Model Terbaik (Backward Elimination)

model_fit <- step(object = model, direction = "backward")
## Start:  AIC=1044.86
## Status ~ IPK + Kelas + JK + Usia
## 
##         Df Deviance    AIC
## - Usia   1   1033.3 1043.3
## - JK     1   1034.8 1044.8
## <none>       1032.9 1044.9
## - Kelas  1   1046.8 1056.8
## - IPK    2   1692.3 1700.3
## 
## Step:  AIC=1043.3
## Status ~ IPK + Kelas + JK
## 
##         Df Deviance    AIC
## - JK     1   1035.0 1043.0
## <none>       1033.3 1043.3
## - Kelas  1   1049.3 1057.3
## - IPK    2   1698.4 1704.4
## 
## Step:  AIC=1043.03
## Status ~ IPK + Kelas
## 
##         Df Deviance    AIC
## <none>       1035.0 1043.0
## - Kelas  1   1050.0 1056.0
## - IPK    2   1784.4 1788.4
summary(model_fit)
## 
## Call:
## glm(formula = Status ~ IPK + Kelas, family = "binomial", data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7510   0.2144   0.2144   0.7775   3.1318  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -4.8966     1.0035  -4.879 1.06e-06 ***
## IPK1          5.9380     1.0071   5.896 3.72e-09 ***
## IPK2          8.6577     1.0249   8.448  < 2e-16 ***
## Kelas1       -0.8658     0.2179  -3.973 7.09e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1805.8  on 1761  degrees of freedom
## Residual deviance: 1035.0  on 1758  degrees of freedom
## AIC: 1043
## 
## Number of Fisher Scoring iterations: 7

Uji Kesesuaian Model

hoslem.test(model_fit$y, fitted(model_fit))
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  model_fit$y, fitted(model_fit)
## X-squared = 2.449, df = 8, p-value = 0.9641

Odds Ratio

koef=coef(model_fit)
odds_ratio=exp(koef)
odds_ratio
##  (Intercept)         IPK1         IPK2       Kelas1 
## 7.472150e-03 3.791762e+02 5.754021e+03 4.206955e-01

Ketepatan Klasifikasi

predict_rl <-predict(model_fit, newdata=data, type="response")
fit_rl <- ifelse(predict_rl>0.5,1,0)
tab_rl <- table(data$Status,fit_rl)
confusionMatrix(tab_rl, positive="1")
## Confusion Matrix and Statistics
## 
##    fit_rl
##        0    1
##   0  150  218
##   1    1 1393
##                                           
##                Accuracy : 0.8757          
##                  95% CI : (0.8594, 0.8908)
##     No Information Rate : 0.9143          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.5197          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.8647          
##             Specificity : 0.9934          
##          Pos Pred Value : 0.9993          
##          Neg Pred Value : 0.4076          
##              Prevalence : 0.9143          
##          Detection Rate : 0.7906          
##    Detection Prevalence : 0.7911          
##       Balanced Accuracy : 0.9290          
##                                           
##        'Positive' Class : 1               
##