Library yang digunakan
library(readxl)
library(base)
library(stats)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(ResourceSelection)
## ResourceSelection 0.3-5 2019-07-22
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
##
## cluster
Import Data
data1 <- read_excel("data skripsi fuad.xlsx")
data1 <- na.omit(data1)
data1$Status<- as.factor(data1$Status)
data1$JK<- as.factor(data1$JK)
data1$Kelompok <- as.factor(data1$Kelompok)
data1$Asal<- as.factor(data1$Asal)
data1$Kelas<- as.factor(data1$Kelas)
data1$IPK <- as.factor(data1$IPK)
head(data1)
Tabulasi Silang
#tabel observasi
obsjk <- table(data1$Status,data1$JK)
obskelompok <- table(data1$Status,data1$Kelompok)
obsasal <- table(data1$Status,data1$Asal)
obskelas <- table(data1$Status,data1$Kelas)
obsipk <- table(data1$Status,data1$IPK)
#tabel ekspektasi
obsjk
##
## L P
## Tepat Waktu 428 966
## Tidak Tepat 217 151
chisq.test(obsjk)$expected
##
## L P
## Tepat Waktu 510.2894 883.7106
## Tidak Tepat 134.7106 233.2894
obskelompok
##
## Saintek Soshum
## Tepat Waktu 381 1013
## Tidak Tepat 109 259
chisq.test(obskelompok)$expected
##
## Saintek Soshum
## Tepat Waktu 387.6617 1006.3383
## Tidak Tepat 102.3383 265.6617
obsasal
##
## Luar Surabaya Surabaya
## Tepat Waktu 1093 301
## Tidak Tepat 274 94
chisq.test(obsasal)$expected
##
## Luar Surabaya Surabaya
## Tepat Waktu 1081.4972 312.50284
## Tidak Tepat 285.5028 82.49716
obskelas
##
## Malam Pagi
## Tepat Waktu 137 1257
## Tidak Tepat 70 298
chisq.test(obskelas)$expected
##
## Malam Pagi
## Tepat Waktu 163.76731 1230.2327
## Tidak Tepat 43.23269 324.7673
obsipk
##
## memuaskan pujian sangat memuaskan
## Tepat Waktu 1 904 489
## Tidak Tepat 150 24 194
chisq.test(obsipk)$expected
##
## memuaskan pujian sangat memuaskan
## Tepat Waktu 119.46311 734.1839 540.353
## Tidak Tepat 31.53689 193.8161 142.647
Uji Independensi
chisq.test(obsjk)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: obsjk
## X-squared = 99.012, df = 1, p-value < 2.2e-16
chisq.test(obskelompok)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: obskelompok
## X-squared = 0.64958, df = 1, p-value = 0.4203
chisq.test(obsasal)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: obsasal
## X-squared = 2.3908, df = 1, p-value = 0.122
chisq.test(obskelas)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: obskelas
## X-squared = 22.858, df = 1, p-value = 1.744e-06
chisq.test(obsipk)
##
## Pearson's Chi-squared test
##
## data: obsipk
## X-squared = 773.89, df = 2, p-value < 2.2e-16
chisq.test(table(data1$Status,data1$Usia))
## Warning in chisq.test(table(data1$Status, data1$Usia)): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: table(data1$Status, data1$Usia)
## X-squared = 72.44, df = 20, p-value = 7.241e-08
Regresi Logistik Biner
data <- read_excel("data skripsi fuad.xlsx", sheet = "presh")
data <- na.omit(data)
data$Status<- as.factor(data$Status)
data$JK<- as.factor(data$JK)
data$Kelompok <- as.factor(data$Kelompok)
data$Asal<- as.factor(data$Asal)
data$Kelas<- as.factor(data$Kelas)
data$IPK <- as.factor(data$IPK)
model <- glm(Status~IPK+Kelas+JK+Usia,
data = data, family = "binomial")
summary(model)
##
## Call:
## glm(formula = Status ~ IPK + Kelas + JK + Usia, family = "binomial",
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.8251 0.1959 0.2163 0.7330 3.1025
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.42266 1.38185 -3.201 0.001372 **
## IPK1 5.98145 1.00822 5.933 2.98e-09 ***
## IPK2 8.77632 1.03039 8.517 < 2e-16 ***
## Kelas1 -0.86964 0.22767 -3.820 0.000134 ***
## JK1 0.22829 0.16632 1.373 0.169873
## Usia -0.02773 0.04125 -0.672 0.501377
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1805.8 on 1761 degrees of freedom
## Residual deviance: 1032.9 on 1756 degrees of freedom
## AIC: 1044.9
##
## Number of Fisher Scoring iterations: 7
Pengujian Signifikansi Parameter
Uji Serentak
lrtest(model)
Uji Parsial
regTermTest(model,"IPK")
## Wald test for IPK
## in glm(formula = Status ~ IPK + Kelas + JK + Usia, family = "binomial",
## data = data)
## F = 91.20187 on 2 and 1756 df: p= < 2.22e-16
regTermTest(model,"Kelas")
## Wald test for Kelas
## in glm(formula = Status ~ IPK + Kelas + JK + Usia, family = "binomial",
## data = data)
## F = 14.58995 on 1 and 1756 df: p= 0.00013827
regTermTest(model,"JK")
## Wald test for JK
## in glm(formula = Status ~ IPK + Kelas + JK + Usia, family = "binomial",
## data = data)
## F = 1.884064 on 1 and 1756 df: p= 0.17005
regTermTest(model,"Usia")
## Wald test for Usia
## in glm(formula = Status ~ IPK + Kelas + JK + Usia, family = "binomial",
## data = data)
## F = 0.4520201 on 1 and 1756 df: p= 0.50147
Pemilihan Model Terbaik (Backward Elimination)
model_fit <- step(object = model, direction = "backward")
## Start: AIC=1044.86
## Status ~ IPK + Kelas + JK + Usia
##
## Df Deviance AIC
## - Usia 1 1033.3 1043.3
## - JK 1 1034.8 1044.8
## <none> 1032.9 1044.9
## - Kelas 1 1046.8 1056.8
## - IPK 2 1692.3 1700.3
##
## Step: AIC=1043.3
## Status ~ IPK + Kelas + JK
##
## Df Deviance AIC
## - JK 1 1035.0 1043.0
## <none> 1033.3 1043.3
## - Kelas 1 1049.3 1057.3
## - IPK 2 1698.4 1704.4
##
## Step: AIC=1043.03
## Status ~ IPK + Kelas
##
## Df Deviance AIC
## <none> 1035.0 1043.0
## - Kelas 1 1050.0 1056.0
## - IPK 2 1784.4 1788.4
summary(model_fit)
##
## Call:
## glm(formula = Status ~ IPK + Kelas, family = "binomial", data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.7510 0.2144 0.2144 0.7775 3.1318
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.8966 1.0035 -4.879 1.06e-06 ***
## IPK1 5.9380 1.0071 5.896 3.72e-09 ***
## IPK2 8.6577 1.0249 8.448 < 2e-16 ***
## Kelas1 -0.8658 0.2179 -3.973 7.09e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1805.8 on 1761 degrees of freedom
## Residual deviance: 1035.0 on 1758 degrees of freedom
## AIC: 1043
##
## Number of Fisher Scoring iterations: 7
Uji Kesesuaian Model
hoslem.test(model_fit$y, fitted(model_fit))
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: model_fit$y, fitted(model_fit)
## X-squared = 2.449, df = 8, p-value = 0.9641
Odds Ratio
koef=coef(model_fit)
odds_ratio=exp(koef)
odds_ratio
## (Intercept) IPK1 IPK2 Kelas1
## 7.472150e-03 3.791762e+02 5.754021e+03 4.206955e-01
Ketepatan Klasifikasi
predict_rl <-predict(model_fit, newdata=data, type="response")
fit_rl <- ifelse(predict_rl>0.5,1,0)
tab_rl <- table(data$Status,fit_rl)
confusionMatrix(tab_rl, positive="1")
## Confusion Matrix and Statistics
##
## fit_rl
## 0 1
## 0 150 218
## 1 1 1393
##
## Accuracy : 0.8757
## 95% CI : (0.8594, 0.8908)
## No Information Rate : 0.9143
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.5197
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.8647
## Specificity : 0.9934
## Pos Pred Value : 0.9993
## Neg Pred Value : 0.4076
## Prevalence : 0.9143
## Detection Rate : 0.7906
## Detection Prevalence : 0.7911
## Balanced Accuracy : 0.9290
##
## 'Positive' Class : 1
##