library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
Data diperoleh melalui link https://online.stat.psu.edu/stat857/sites/onlinecourses.science.psu.edu.stat857/files/german_credit/index.csv dan untuk peubah selain duration, previous.credit, credit.amount, dan age, diganti menjadi faktor.
credit <- read.csv("https://online.stat.psu.edu/stat857/sites/onlinecourses.science.psu.edu.stat857/files/german_credit/index.csv")
credit <- credit %>% mutate(across(-c(Duration.of.Credit..month.,
Credit.Amount,
Age..years.),as.factor))
str(credit)
## 'data.frame': 1000 obs. of 21 variables:
## $ Creditability : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
## $ Account.Balance : Factor w/ 4 levels "1","2","3","4": 1 1 2 1 1 1 1 1 4 2 ...
## $ Duration.of.Credit..month. : int 18 9 12 12 12 10 8 6 18 24 ...
## $ Payment.Status.of.Previous.Credit: Factor w/ 5 levels "0","1","2","3",..: 5 5 3 5 5 5 5 5 5 3 ...
## $ Purpose : Factor w/ 10 levels "0","1","2","3",..: 3 1 9 1 1 1 1 1 4 4 ...
## $ Credit.Amount : int 1049 2799 841 2122 2171 2241 3398 1361 1098 3758 ...
## $ Value.Savings.Stocks : Factor w/ 5 levels "1","2","3","4",..: 1 1 2 1 1 1 1 1 1 3 ...
## $ Length.of.current.employment : Factor w/ 5 levels "1","2","3","4",..: 2 3 4 3 3 2 4 2 1 1 ...
## $ Instalment.per.cent : Factor w/ 4 levels "1","2","3","4": 4 2 2 3 4 1 1 2 4 1 ...
## $ Sex...Marital.Status : Factor w/ 4 levels "1","2","3","4": 2 3 2 3 3 3 3 3 2 2 ...
## $ Guarantors : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
## $ Duration.in.Current.address : Factor w/ 4 levels "1","2","3","4": 4 2 4 2 4 3 4 4 4 4 ...
## $ Most.valuable.available.asset : Factor w/ 4 levels "1","2","3","4": 2 1 1 1 2 1 1 1 3 4 ...
## $ Age..years. : int 21 36 23 39 38 48 39 40 65 23 ...
## $ Concurrent.Credits : Factor w/ 3 levels "1","2","3": 3 3 3 3 1 3 3 3 3 3 ...
## $ Type.of.apartment : Factor w/ 3 levels "1","2","3": 1 1 1 1 2 1 2 2 2 1 ...
## $ No.of.Credits.at.this.Bank : Factor w/ 4 levels "1","2","3","4": 1 2 1 2 2 2 2 1 2 1 ...
## $ Occupation : Factor w/ 4 levels "1","2","3","4": 3 3 2 2 2 2 2 2 1 1 ...
## $ No.of.dependents : Factor w/ 2 levels "1","2": 1 2 1 2 1 2 1 2 1 1 ...
## $ Telephone : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
## $ Foreign.Worker : Factor w/ 2 levels "1","2": 1 1 1 2 2 2 2 2 1 1 ...
names(credit) <- c("creditability", "account.balance", "duration", "previous.credit", "purpose", "credit.amount", "saving.balance", "employment.year", "installment.rate", "marital.status", "guarantors", "duration.address", "valuable.asset", "age", "concurrent.credit", "apartment.type", "number.of.credit", "occupation", "dependents", "telephone", "foreign.worker")
credit2 <- credit[c("creditability", "account.balance", "duration", "credit.amount", "saving.balance", "employment.year", "installment.rate", "marital.status", "duration.address", "age","dependents", "number.of.credit", "occupation", "previous.credit")]
head(credit2, 10)
## creditability account.balance duration credit.amount saving.balance
## 1 1 1 18 1049 1
## 2 1 1 9 2799 1
## 3 1 2 12 841 2
## 4 1 1 12 2122 1
## 5 1 1 12 2171 1
## 6 1 1 10 2241 1
## 7 1 1 8 3398 1
## 8 1 1 6 1361 1
## 9 1 4 18 1098 1
## 10 1 2 24 3758 3
## employment.year installment.rate marital.status duration.address age
## 1 2 4 2 4 21
## 2 3 2 3 2 36
## 3 4 2 2 4 23
## 4 3 3 3 2 39
## 5 3 4 3 4 38
## 6 2 1 3 3 48
## 7 4 1 3 4 39
## 8 2 2 3 4 40
## 9 1 4 2 4 65
## 10 1 1 2 4 23
## dependents number.of.credit occupation previous.credit
## 1 1 1 3 4
## 2 2 2 3 4
## 3 1 1 2 2
## 4 2 2 2 4
## 5 1 2 2 4
## 6 2 2 2 4
## 7 1 2 2 4
## 8 2 1 2 4
## 9 1 2 1 4
## 10 1 1 1 2
#glimpse(credit)
glimpse(credit2)
## Rows: 1,000
## Columns: 14
## $ creditability <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ account.balance <fct> 1, 1, 2, 1, 1, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 1, 2,…
## $ duration <int> 18, 9, 12, 12, 12, 10, 8, 6, 18, 24, 11, 30, 6, 48, 1…
## $ credit.amount <int> 1049, 2799, 841, 2122, 2171, 2241, 3398, 1361, 1098, …
## $ saving.balance <fct> 1, 1, 2, 1, 1, 1, 1, 1, 1, 3, 1, 2, 1, 2, 5, 3, 1, 3,…
## $ employment.year <fct> 2, 3, 4, 3, 3, 2, 4, 2, 1, 1, 3, 4, 4, 1, 4, 3, 3, 2,…
## $ installment.rate <fct> 4, 2, 2, 3, 4, 1, 1, 2, 4, 1, 2, 1, 1, 2, 2, 2, 1, 1,…
## $ marital.status <fct> 2, 3, 2, 3, 3, 3, 3, 3, 2, 2, 3, 4, 2, 3, 4, 3, 3, 4,…
## $ duration.address <fct> 4, 2, 4, 2, 4, 3, 4, 4, 4, 4, 2, 4, 4, 4, 4, 3, 2, 3,…
## $ age <int> 21, 36, 23, 39, 38, 48, 39, 40, 65, 23, 36, 24, 31, 3…
## $ dependents <fct> 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1,…
## $ number.of.credit <fct> 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1, 1, 2, 1, 2, 1,…
## $ occupation <fct> 3, 3, 2, 2, 2, 2, 2, 2, 1, 1, 3, 3, 3, 4, 2, 3, 2, 3,…
## $ previous.credit <fct> 4, 4, 2, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 3, 2, 2, 4, 2,…
sum(is.na(credit2))
## [1] 0
sum(is.null(credit2))
## [1] 0
Didapatkan dari data, bahwa data tidak mengandung nilai Null atau NA.
Dilakukan pemecahan data menjadi data training (untuk membentuk model) dan data testing (untuk menguji kebaikan model), dengan proporsi 0.8:0.2.
#index_split <- rsample::initial_split(data = credit,
# prop = 0.8,
# strata = "Creditability"
# )
#train_credit <- rsample::training(index_split)
#test_credit <- rsample::testing(index_split)
set.seed(1); index_split <- rsample::initial_split(data = credit2,
prop = 0.8,
strata = "creditability"
)
set.seed(1); train_credit <- rsample::training(index_split)
set.seed(1); test_credit <- rsample::testing(index_split)
Model dibentuk dengan menggunakan data training dan diperoleh hasil sebagai berikut:
#logreg <- glm(Creditability~.,data=train_credit,family = "binomial")
logreg <- glm(creditability~.,data=train_credit,family = "binomial")
summary(logreg)
##
## Call:
## glm(formula = creditability ~ ., family = "binomial", data = train_credit)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.7625 -0.7261 0.3844 0.7330 2.0848
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.019e+00 1.085e+00 0.939 0.347589
## account.balance2 4.588e-01 2.301e-01 1.994 0.046140 *
## account.balance3 1.106e+00 4.205e-01 2.631 0.008503 **
## account.balance4 1.885e+00 2.547e-01 7.398 1.38e-13 ***
## duration -3.725e-02 1.012e-02 -3.682 0.000231 ***
## credit.amount -8.668e-05 4.666e-05 -1.858 0.063203 .
## saving.balance2 1.131e-01 3.061e-01 0.369 0.711887
## saving.balance3 2.984e-01 4.493e-01 0.664 0.506644
## saving.balance4 1.151e+00 5.688e-01 2.023 0.043080 *
## saving.balance5 9.259e-01 2.823e-01 3.279 0.001041 **
## employment.year2 -2.533e-01 4.706e-01 -0.538 0.590444
## employment.year3 2.443e-01 4.506e-01 0.542 0.587666
## employment.year4 9.819e-01 4.941e-01 1.987 0.046878 *
## employment.year5 7.232e-02 4.529e-01 0.160 0.873141
## installment.rate2 -2.003e-01 3.351e-01 -0.598 0.549961
## installment.rate3 -5.243e-01 3.612e-01 -1.451 0.146670
## installment.rate4 -9.081e-01 3.181e-01 -2.854 0.004311 **
## marital.status2 2.640e-01 4.233e-01 0.624 0.532941
## marital.status3 1.006e+00 4.154e-01 2.421 0.015485 *
## marital.status4 6.581e-01 4.889e-01 1.346 0.178255
## duration.address2 -5.329e-01 3.223e-01 -1.653 0.098269 .
## duration.address3 -4.214e-01 3.583e-01 -1.176 0.239644
## duration.address4 -4.091e-01 3.161e-01 -1.294 0.195600
## age 1.163e-02 9.676e-03 1.202 0.229257
## dependents2 -4.099e-01 2.784e-01 -1.472 0.140942
## number.of.credit2 -4.338e-01 2.596e-01 -1.671 0.094707 .
## number.of.credit3 -1.276e-01 7.148e-01 -0.179 0.858275
## number.of.credit4 -1.435e+00 1.314e+00 -1.092 0.274922
## occupation2 -7.171e-01 7.757e-01 -0.924 0.355266
## occupation3 -7.767e-01 7.498e-01 -1.036 0.300235
## occupation4 -6.433e-01 7.535e-01 -0.854 0.393219
## previous.credit1 -6.318e-01 6.329e-01 -0.998 0.318198
## previous.credit2 4.083e-01 5.100e-01 0.801 0.423372
## previous.credit3 6.230e-01 5.461e-01 1.141 0.253908
## previous.credit4 1.396e+00 5.182e-01 2.693 0.007073 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 977.38 on 799 degrees of freedom
## Residual deviance: 738.25 on 765 degrees of freedom
## AIC: 808.25
##
## Number of Fisher Scoring iterations: 5
Dari pemodelan di atas, diperoleh terdapat beberapa peubah penjelas yang tidak berpengaruh signifikan pada taraf nyata 5% dalam menjelaskan peubah respon (creditability), antara lain: credit.amount, duration.address, age, dependents, number.of.credits, dan occupation. Peubah-peubah ini bisa dibuang dan menyisakan peubah-peubah yang berpengaruh signifikan saja, atau ditetapkan dalam model. Untuk memudahkan dalam interpretasi, maka diputuskan membuang peubah-peubah penjelas dan dilakukan pembuatan model baru dengan peubah sisanya.
logreg2 <- glm(creditability~account.balance+duration+saving.balance+employment.year+
installment.rate+marital.status+previous.credit,data=train_credit,family = "binomial")
summary(logreg2)
##
## Call:
## glm(formula = creditability ~ account.balance + duration + saving.balance +
## employment.year + installment.rate + marital.status + previous.credit,
## family = "binomial", data = train_credit)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.7539 -0.7580 0.4054 0.7308 1.9463
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.123000 0.729883 0.169 0.86617
## account.balance2 0.473731 0.225339 2.102 0.03553 *
## account.balance3 1.307193 0.415105 3.149 0.00164 **
## account.balance4 1.826200 0.248846 7.339 2.16e-13 ***
## duration -0.047689 0.007721 -6.177 6.55e-10 ***
## saving.balance2 0.013504 0.297222 0.045 0.96376
## saving.balance3 0.427325 0.436213 0.980 0.32727
## saving.balance4 1.006886 0.546101 1.844 0.06522 .
## saving.balance5 0.900029 0.279078 3.225 0.00126 **
## employment.year2 -0.461522 0.393504 -1.173 0.24086
## employment.year3 -0.091738 0.370940 -0.247 0.80467
## employment.year4 0.652855 0.417226 1.565 0.11764
## employment.year5 -0.177679 0.391261 -0.454 0.64974
## installment.rate2 -0.157938 0.325849 -0.485 0.62789
## installment.rate3 -0.354829 0.345974 -1.026 0.30508
## installment.rate4 -0.647864 0.288858 -2.243 0.02491 *
## marital.status2 0.194593 0.409111 0.476 0.63432
## marital.status3 0.827485 0.402300 2.057 0.03970 *
## marital.status4 0.621675 0.476010 1.306 0.19155
## previous.credit1 -0.371396 0.596987 -0.622 0.53386
## previous.credit2 0.654365 0.468821 1.396 0.16279
## previous.credit3 0.576545 0.523052 1.102 0.27034
## previous.credit4 1.356065 0.494272 2.744 0.00608 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 977.38 on 799 degrees of freedom
## Residual deviance: 753.75 on 777 degrees of freedom
## AIC: 799.75
##
## Number of Fisher Scoring iterations: 5
Terdapat peubah installment.rate yang tidak signifikan dalam taraf nyata 5% dalam menjelaskan peubah respon (creditability), sehingga dengan langkah yang sama seperti sebelumnya diperoleh:
logreg3 <- glm(creditability~account.balance+duration+saving.balance+employment.year+
marital.status+previous.credit,data=train_credit,family = "binomial")
summary(logreg3)
##
## Call:
## glm(formula = creditability ~ account.balance + duration + saving.balance +
## employment.year + marital.status + previous.credit, family = "binomial",
## data = train_credit)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6850 -0.8206 0.4175 0.7299 2.1200
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.026911 0.705283 -0.038 0.969563
## account.balance2 0.503164 0.223632 2.250 0.024451 *
## account.balance3 1.369116 0.410339 3.337 0.000848 ***
## account.balance4 1.807905 0.247061 7.318 2.52e-13 ***
## duration -0.047628 0.007655 -6.221 4.93e-10 ***
## saving.balance2 0.028521 0.293679 0.097 0.922634
## saving.balance3 0.449697 0.433146 1.038 0.299172
## saving.balance4 0.959736 0.540175 1.777 0.075615 .
## saving.balance5 0.865278 0.277577 3.117 0.001825 **
## employment.year2 -0.520819 0.391335 -1.331 0.183229
## employment.year3 -0.110580 0.368976 -0.300 0.764411
## employment.year4 0.617826 0.415049 1.489 0.136603
## employment.year5 -0.279266 0.388180 -0.719 0.471879
## marital.status2 0.113485 0.405668 0.280 0.779670
## marital.status3 0.687437 0.396251 1.735 0.082767 .
## marital.status4 0.488384 0.472553 1.034 0.301369
## previous.credit1 -0.554916 0.591288 -0.938 0.347994
## previous.credit2 0.537196 0.466636 1.151 0.249645
## previous.credit3 0.502870 0.521507 0.964 0.334914
## previous.credit4 1.250488 0.491779 2.543 0.010997 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 977.38 on 799 degrees of freedom
## Residual deviance: 761.21 on 780 degrees of freedom
## AIC: 801.21
##
## Number of Fisher Scoring iterations: 5
logreg4 <- glm(creditability~account.balance+duration+saving.balance+previous.credit,
data=train_credit,family = "binomial")
summary(logreg4)
##
## Call:
## glm(formula = creditability ~ account.balance + duration + saving.balance +
## previous.credit, family = "binomial", data = train_credit)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.7106 -0.8592 0.4480 0.7712 1.9555
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.110848 0.490485 0.226 0.821204
## account.balance2 0.448306 0.216979 2.066 0.038817 *
## account.balance3 1.185640 0.394718 3.004 0.002667 **
## account.balance4 1.804899 0.242043 7.457 8.86e-14 ***
## duration -0.039967 0.007205 -5.547 2.90e-08 ***
## saving.balance2 0.063589 0.288907 0.220 0.825792
## saving.balance3 0.478523 0.424153 1.128 0.259242
## saving.balance4 0.850819 0.526442 1.616 0.106057
## saving.balance5 0.899850 0.270871 3.322 0.000894 ***
## previous.credit1 -0.456384 0.576008 -0.792 0.428173
## previous.credit2 0.552842 0.450511 1.227 0.219768
## previous.credit3 0.593091 0.507433 1.169 0.242481
## previous.credit4 1.312042 0.476098 2.756 0.005854 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 977.38 on 799 degrees of freedom
## Residual deviance: 788.29 on 787 degrees of freedom
## AIC: 814.29
##
## Number of Fisher Scoring iterations: 5
Diperoleh peubah yang signifikan semuanya dalam menjelaskan creditability, yaitu account.balance, duration, saving.balance, dan previous.credit.
Asumsi independensi terpenuhi jika peubah prediktor tidak saling berkorelasi (tidak terjadi multikolinearitas).
plot(performance::check_collinearity(logreg4))
Diperoleh semua peubah memiliki nilai VIF < 5 (rendah), artinya tidak
ada peubah yang saling berkorelasi. Asumsi independensi terpenuhi.
Adanya pencilan akan mengakibatkan model tidak akurat. Sehingga keberadaan pencilan harus diuji.
performance::check_outliers(logreg4)
## OK: No outliers detected.
plot(performance::check_outliers(logreg4))
Diperoleh bahwa data tidak mengandung pencilan, atau tidak terdapat data yang letaknya di luar selang.
performance::performance(logreg4) %>%
parameters::display()
| AIC | BIC | Tjur’s R2 | RMSE | Sigma | Log_loss | Score_log | Score_spherical | PCP |
|---|---|---|---|---|---|---|---|---|
| 814.29 | 875.19 | 0.23 | 0.40 | 1.00 | 0.49 | -Inf | 1.25e-03 | 0.68 |
Diperoleh nilai RMSE yang cukup kecil, semakin kecil nilai RMSE (mendekati nol) diharapkan model yang dihasilkan semakin akurat.
performance::performance_hosmer(logreg4)
## # Hosmer-Lemeshow Goodness-of-Fit Test
##
## Chi-squared: 9.337
## df: 8
## p-value: 0.315
## Summary: model seems to fit well.
Uji GoF Hosmer Lemeshow digunakan untuk menguji kebaikan model. Hipotesis dalam pengujian ini adalah:
H0: Tidak ada perbedaan antara nilai pengamatan dan prediksi, model sudah baik. H1: Ada perbedaan antara nilai pengamatan dan prediksi.
Diperoleh nilai peluang lebih dari taraf nyata sebesar 5%, artinya model sudah baik dan layak digunakan untuk prediksi.
summary(logreg4)
##
## Call:
## glm(formula = creditability ~ account.balance + duration + saving.balance +
## previous.credit, family = "binomial", data = train_credit)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.7106 -0.8592 0.4480 0.7712 1.9555
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.110848 0.490485 0.226 0.821204
## account.balance2 0.448306 0.216979 2.066 0.038817 *
## account.balance3 1.185640 0.394718 3.004 0.002667 **
## account.balance4 1.804899 0.242043 7.457 8.86e-14 ***
## duration -0.039967 0.007205 -5.547 2.90e-08 ***
## saving.balance2 0.063589 0.288907 0.220 0.825792
## saving.balance3 0.478523 0.424153 1.128 0.259242
## saving.balance4 0.850819 0.526442 1.616 0.106057
## saving.balance5 0.899850 0.270871 3.322 0.000894 ***
## previous.credit1 -0.456384 0.576008 -0.792 0.428173
## previous.credit2 0.552842 0.450511 1.227 0.219768
## previous.credit3 0.593091 0.507433 1.169 0.242481
## previous.credit4 1.312042 0.476098 2.756 0.005854 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 977.38 on 799 degrees of freedom
## Residual deviance: 788.29 on 787 degrees of freedom
## AIC: 814.29
##
## Number of Fisher Scoring iterations: 5
Diperoleh model sebagai berikut:
\[P(Y_i≤1|X_i)=\frac{e^{-0.110848+0.448306(AB=2)+1.185640(AB=3)+1.804899(AB=4)-0.039967D+0.899850(SB=5)+1.312042(PC=4)}}{1+e^{-0.110848+0.448306(AB=2)+1.185640(AB=3)+1.804899(AB=4)-0.039967D+0.899850(SB=5)+1.312042(PC=4)}}\]
#null_model <- glm(Creditability~1,data=train_credit,family = "binomial")
null_model <- glm(creditability~1,data=train_credit,family = "binomial")
performance::test_lrt(null_model,logreg4)
## # Likelihood-Ratio-Test (LRT) for Model Comparison (ML-estimator)
##
## Name | Model | df | df_diff | Chi2 | p
## ---------------------------------------------------
## null_model | glm | 1 | | |
## logreg4 | glm | 13 | 12 | 189.09 | < .001
Diperoleh nilai peluang kurang dari taraf nyata 5% pada pengujian Likelihood-Ratio Test, dimana:
H0: Tidak ada peningkatan signifikan antara model yang dibentuk dengan model keseluruhan H1: Ada peningkatan signifikan antara model yang dibentuk dengan model keseluruhan
dan diperoleh nilai peluang lebih kecil dari taraf nyata 5%, maka H0 ditolak.
mp_logreg4 <- parameters::parameters(logreg4,exponentiate = TRUE)
parameters::display(mp_logreg4)
| Parameter | Odds Ratio | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| (Intercept) | 1.12 | 0.55 | (0.42, 2.91) | 0.23 | 0.821 |
| account balance (2) | 1.57 | 0.34 | (1.02, 2.40) | 2.07 | 0.039 |
| account balance (3) | 3.27 | 1.29 | (1.56, 7.43) | 3.00 | 0.003 |
| account balance (4) | 6.08 | 1.47 | (3.82, 9.87) | 7.46 | < .001 |
| duration | 0.96 | 6.92e-03 | (0.95, 0.97) | -5.55 | < .001 |
| saving balance (2) | 1.07 | 0.31 | (0.61, 1.90) | 0.22 | 0.826 |
| saving balance (3) | 1.61 | 0.68 | (0.73, 3.92) | 1.13 | 0.259 |
| saving balance (4) | 2.34 | 1.23 | (0.90, 7.35) | 1.62 | 0.106 |
| saving balance (5) | 2.46 | 0.67 | (1.47, 4.25) | 3.32 | < .001 |
| previous credit (1) | 0.63 | 0.36 | (0.20, 1.98) | -0.79 | 0.428 |
| previous credit (2) | 1.74 | 0.78 | (0.73, 4.31) | 1.23 | 0.220 |
| previous credit (3) | 1.81 | 0.92 | (0.68, 5.00) | 1.17 | 0.242 |
| previous credit (4) | 3.71 | 1.77 | (1.48, 9.67) | 2.76 | 0.006 |
# report::report(logreg4)
s <- summary(logreg4)
coeff.sig <- s$coefficients[,1][s$coefficients[,4] < 0.05]
odds <- round(exp(coeff.sig),2)
odds
## account.balance2 account.balance3 account.balance4 duration
## 1.57 3.27 6.08 0.96
## saving.balance5 previous.credit4
## 2.46 3.71
Diperoleh odds ratio atau rasio kecenderungan untuk masing-masing peubah penjelas yang signifikan sebagai berikut:
Untuk peubah account.balance (saldo akun), didapatkan nilai OR sebesar 1.57 untuk kategori 2 (Account Balance = 0 Deutsch Mark). Artinya, peminjam/nasabah memiliki kecenderungan untuk masuk dalam klasifikasi kredit buruk (1) sebesar 1.57 kali lebih besar jika memiliki saldo akun sebesar 0 DM dibandingkan dengan yang tidak memiliki akun. Kemudian, didapatkan nilai OR sebesar 3.27 untuk kategori 3 (Account Balance = antara 0 - 200 DM) yang artinya, nasabah memiliki kecenderungan untuk masuk dalam klasifikasi kredit buruk (1) sebesar 3.27 kali lebih besar jika memiliki saldo akun antara 0 hingga 200 DM. Dan terakhir, untuk peubah yang sama pada kategori 4 (Account Balance > 200 DM) memiliki nilai OR sebesar 6.08, artinya nasabah memiliki kecenderungan untuk masuk dalam klasifikasi kredit buruk (1) sebesar 6.08 kali lebih besar jika memiliki saldo akun di atas 200 DM.
Untuk peubah duration (durasi kredit), diperoleh nilai OR sebesar 0.96 yang artinya jika nasaban menambah durasi kredit sebanyak 1 tahun, maka ia cenderung untuk tergolong dalam klasifikasi kredit buruk sebesar 0.96 kali dibandingkan durasi sebelumnya. Hal ini mengindikasikan bahwa semakin lama durasi nasabah mengajukan kredit, kecenderungan ia tergolong dalam kredit buruk akan semakin menurun.
Untuk peubah saving.balance (saldo tabungan), didapatkan nilai OR sebesar 2.46 untuk kategori 5 (Saving Balance > 1000 DM). Artinya, peminjam/nasabah memiliki kecenderungan untuk masuk dalam klasifikasi kredit buruk (1) sebesar 2.46 kali lebih besar jika memiliki saldo tabungan > 1000 DM dibandingkan dengan yang saldonya tidak diketahui. Hal ini mengindikasikan bahwa tabungan yang besar justru menarik nasabah untuk meminjam.
Diperoleh nilai OR untuk peubah penjelas previous.credit dengan kategori 4 (Previous Credit = Critical) sebesar 3.71, artinya nasabah dengan status kredit sebelumnya kritis (tidak membayar) akan berpotensi 3.71 kali lebih besar daripada nasabah yang status kredit sebelumnya membayar tepat waktu untuk tergolong ke dalam status kredit buruk.
Anova Test
anova(logreg4,test = "Chisq") %>%
parameters::parameters() %>%
parameters::display()
| Parameter | df | Deviance | df (error) | Deviance (error) | p |
|---|---|---|---|---|---|
| NULL | 799 | 977.38 | |||
| account.balance | 3 | 109.93 | 796 | 867.46 | < .001 |
| duration | 1 | 39.72 | 795 | 827.74 | < .001 |
| saving.balance | 4 | 14.46 | 791 | 813.28 | 0.006 |
| previous.credit | 4 | 24.98 | 787 | 788.29 | < .001 |
Anova Table (Type 1 tests)
Diperoleh nilai peluang yang signifikan untuk semua peubah penjelas dalam model, artinya peubah-peubah ini secara signifikan mampu menjelaskan status kredit nasabah.
qplot(predict(logreg4,type = "link"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#qplot(train_credit$Credit.Amount,predict(logreg,type = "link"))
#qplot(train_credit$Credit.Amount,predict(logreg,type = "response"))
qplot(train_credit$credit.amount,predict(logreg4,type = "link"))
qplot(train_credit$credit.amount,predict(logreg4,type = "response"))
prob_test <- predict(logreg4,type = "response",newdata = test_credit)
pred_class1_test <- as.factor(ifelse(prob_test<=0.5,0,1))
pred_class2_test <- as.factor(ifelse(prob_test<=0.55,0,1))
pred_class3_test <- as.factor(ifelse(prob_test<=0.4,0,1))
#caret::confusionMatrix(pred_class1_test,test_credit$Creditability)
#caret::confusionMatrix(pred_class2_test,test_credit$Creditability)
#caret::confusionMatrix(pred_class3_test,test_credit$Creditability)
caret::confusionMatrix(pred_class1_test,test_credit$creditability)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 19 12
## 1 41 128
##
## Accuracy : 0.735
## 95% CI : (0.6681, 0.7948)
## No Information Rate : 0.7
## P-Value [Acc > NIR] : 0.15789
##
## Kappa : 0.268
##
## Mcnemar's Test P-Value : 0.00012
##
## Sensitivity : 0.3167
## Specificity : 0.9143
## Pos Pred Value : 0.6129
## Neg Pred Value : 0.7574
## Prevalence : 0.3000
## Detection Rate : 0.0950
## Detection Prevalence : 0.1550
## Balanced Accuracy : 0.6155
##
## 'Positive' Class : 0
##
caret::confusionMatrix(pred_class2_test,test_credit$creditability)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 25 22
## 1 35 118
##
## Accuracy : 0.715
## 95% CI : (0.6471, 0.7764)
## No Information Rate : 0.7
## P-Value [Acc > NIR] : 0.3532
##
## Kappa : 0.2766
##
## Mcnemar's Test P-Value : 0.1120
##
## Sensitivity : 0.4167
## Specificity : 0.8429
## Pos Pred Value : 0.5319
## Neg Pred Value : 0.7712
## Prevalence : 0.3000
## Detection Rate : 0.1250
## Detection Prevalence : 0.2350
## Balanced Accuracy : 0.6298
##
## 'Positive' Class : 0
##
caret::confusionMatrix(pred_class3_test,test_credit$creditability)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 8 7
## 1 52 133
##
## Accuracy : 0.705
## 95% CI : (0.6366, 0.7672)
## No Information Rate : 0.7
## P-Value [Acc > NIR] : 0.4733
##
## Kappa : 0.1061
##
## Mcnemar's Test P-Value : 1.014e-08
##
## Sensitivity : 0.1333
## Specificity : 0.9500
## Pos Pred Value : 0.5333
## Neg Pred Value : 0.7189
## Prevalence : 0.3000
## Detection Rate : 0.0400
## Detection Prevalence : 0.0750
## Balanced Accuracy : 0.5417
##
## 'Positive' Class : 0
##
#roc_empirical_test <- ROCit::rocit(prob_test,test_credit$Creditability,negref = 0)
roc_empirical_test <- ROCit::rocit(prob_test,test_credit$creditability,negref = 0)
get_output_test <- plot(roc_empirical_test)
get_output_test$`optimal Youden Index point`
## value FPR TPR cutoff
## 0.4261905 0.2166667 0.6428571 0.7371052
pred_class4_test <- as.factor(ifelse(prob_test<=get_output_test$`optimal Youden Index point`["cutoff"],0,1))
#caret::confusionMatrix(pred_class1_test,test_credit$Creditability)
#caret::confusionMatrix(pred_class4_test,test_credit$Creditability)
caret::confusionMatrix(pred_class1_test,test_credit$creditability)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 19 12
## 1 41 128
##
## Accuracy : 0.735
## 95% CI : (0.6681, 0.7948)
## No Information Rate : 0.7
## P-Value [Acc > NIR] : 0.15789
##
## Kappa : 0.268
##
## Mcnemar's Test P-Value : 0.00012
##
## Sensitivity : 0.3167
## Specificity : 0.9143
## Pos Pred Value : 0.6129
## Neg Pred Value : 0.7574
## Prevalence : 0.3000
## Detection Rate : 0.0950
## Detection Prevalence : 0.1550
## Balanced Accuracy : 0.6155
##
## 'Positive' Class : 0
##
caret::confusionMatrix(pred_class4_test,test_credit$creditability)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 47 51
## 1 13 89
##
## Accuracy : 0.68
## 95% CI : (0.6105, 0.744)
## No Information Rate : 0.7
## P-Value [Acc > NIR] : 0.7579
##
## Kappa : 0.3548
##
## Mcnemar's Test P-Value : 3.746e-06
##
## Sensitivity : 0.7833
## Specificity : 0.6357
## Pos Pred Value : 0.4796
## Neg Pred Value : 0.8725
## Prevalence : 0.3000
## Detection Rate : 0.2350
## Detection Prevalence : 0.4900
## Balanced Accuracy : 0.7095
##
## 'Positive' Class : 0
##
prob <- predict(logreg4,type = "response")
pred_class1 <- as.factor(ifelse(prob<=0.5,0,1))
pred_class2 <- as.factor(ifelse(prob<=0.55,0,1))
pred_class3 <- as.factor(ifelse(prob<=0.4,0,1))
#caret::confusionMatrix(pred_class1,train_credit$Creditability)
#caret::confusionMatrix(pred_class2,train_credit$Creditability)
#caret::confusionMatrix(pred_class3,train_credit$Creditability)
caret::confusionMatrix(pred_class1,train_credit$creditability)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 113 55
## 1 127 505
##
## Accuracy : 0.7725
## 95% CI : (0.7418, 0.8011)
## No Information Rate : 0.7
## P-Value [Acc > NIR] : 2.700e-06
##
## Kappa : 0.4076
##
## Mcnemar's Test P-Value : 1.418e-07
##
## Sensitivity : 0.4708
## Specificity : 0.9018
## Pos Pred Value : 0.6726
## Neg Pred Value : 0.7991
## Prevalence : 0.3000
## Detection Rate : 0.1412
## Detection Prevalence : 0.2100
## Balanced Accuracy : 0.6863
##
## 'Positive' Class : 0
##
caret::confusionMatrix(pred_class2,train_credit$creditability)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 137 87
## 1 103 473
##
## Accuracy : 0.7625
## 95% CI : (0.7314, 0.7916)
## No Information Rate : 0.7
## P-Value [Acc > NIR] : 4.831e-05
##
## Kappa : 0.4235
##
## Mcnemar's Test P-Value : 0.2765
##
## Sensitivity : 0.5708
## Specificity : 0.8446
## Pos Pred Value : 0.6116
## Neg Pred Value : 0.8212
## Prevalence : 0.3000
## Detection Rate : 0.1713
## Detection Prevalence : 0.2800
## Balanced Accuracy : 0.7077
##
## 'Positive' Class : 0
##
caret::confusionMatrix(pred_class3,train_credit$creditability)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 64 22
## 1 176 538
##
## Accuracy : 0.7525
## 95% CI : (0.7211, 0.7821)
## No Information Rate : 0.7
## P-Value [Acc > NIR] : 0.0005669
##
## Kappa : 0.2784
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.2667
## Specificity : 0.9607
## Pos Pred Value : 0.7442
## Neg Pred Value : 0.7535
## Prevalence : 0.3000
## Detection Rate : 0.0800
## Detection Prevalence : 0.1075
## Balanced Accuracy : 0.6137
##
## 'Positive' Class : 0
##
#roc_empirical <- ROCit::rocit(prob,train_credit$Creditability,negref = 0)
roc_empirical <- ROCit::rocit(prob,train_credit$creditability,negref = 0)
get_output <- plot(roc_empirical)
get_output$`optimal Youden Index point`
## value FPR TPR cutoff
## 0.4613095 0.2833333 0.7446429 0.6735371
pred_class4 <- as.factor(ifelse(prob<=get_output$`optimal Youden Index point`["cutoff"],0,1))
#caret::confusionMatrix(pred_class1,train_credit$Creditability)
#caret::confusionMatrix(pred_class4,train_credit$Creditability)
caret::confusionMatrix(pred_class1,train_credit$creditability)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 113 55
## 1 127 505
##
## Accuracy : 0.7725
## 95% CI : (0.7418, 0.8011)
## No Information Rate : 0.7
## P-Value [Acc > NIR] : 2.700e-06
##
## Kappa : 0.4076
##
## Mcnemar's Test P-Value : 1.418e-07
##
## Sensitivity : 0.4708
## Specificity : 0.9018
## Pos Pred Value : 0.6726
## Neg Pred Value : 0.7991
## Prevalence : 0.3000
## Detection Rate : 0.1412
## Detection Prevalence : 0.2100
## Balanced Accuracy : 0.6863
##
## 'Positive' Class : 0
##
caret::confusionMatrix(pred_class4,train_credit$creditability)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 172 144
## 1 68 416
##
## Accuracy : 0.735
## 95% CI : (0.703, 0.7653)
## No Information Rate : 0.7
## P-Value [Acc > NIR] : 0.01613
##
## Kappa : 0.4214
##
## Mcnemar's Test P-Value : 2.591e-07
##
## Sensitivity : 0.7167
## Specificity : 0.7429
## Pos Pred Value : 0.5443
## Neg Pred Value : 0.8595
## Prevalence : 0.3000
## Detection Rate : 0.2150
## Detection Prevalence : 0.3950
## Balanced Accuracy : 0.7298
##
## 'Positive' Class : 0
##
Diperoleh nilai akurasi tertinggi sebesar 77%.