1 Dữ liệu là bộ germancredit

df <- scorecard::germancredit

df %>% glimpse()
## Observations: 1,000
## Variables: 21
## $ status.of.existing.checking.account                      <fct> ... <...
## $ duration.in.month                                        <dbl> 6, 48...
## $ credit.history                                           <fct> criti...
## $ purpose                                                  <chr> "radi...
## $ credit.amount                                            <dbl> 1169,...
## $ savings.account.and.bonds                                <fct> unkno...
## $ present.employment.since                                 <fct> ... >...
## $ installment.rate.in.percentage.of.disposable.income      <dbl> 4, 2,...
## $ personal.status.and.sex                                  <fct> male ...
## $ other.debtors.or.guarantors                              <fct> none,...
## $ present.residence.since                                  <dbl> 4, 2,...
## $ property                                                 <fct> real ...
## $ age.in.years                                             <dbl> 67, 2...
## $ other.installment.plans                                  <fct> none,...
## $ housing                                                  <fct> own, ...
## $ number.of.existing.credits.at.this.bank                  <dbl> 2, 1,...
## $ job                                                      <fct> skill...
## $ number.of.people.being.liable.to.provide.maintenance.for <dbl> 1, 1,...
## $ telephone                                                <fct> yes, ...
## $ foreign.worker                                           <fct> yes, ...
## $ creditability                                            <fct> good,...

2 Tính AUC dựa trên packages

  • pROC

  • Syntax (response, predictor)

fit <- glm(creditability ~ ., family = binomial(link = "logit"), data = df)

auc(fit$y, fit$fitted.values)
## Area under the curve: 0.8336

3 Dựa trên thống kê Wilcoxon-Mann-Whitney

  • Lấy sẵn công thức wilcox.test

\[ AUC_1 = \frac{U_1} {n_1n_2}\]

# https://blog.revolutionanalytics.com/2017/03/auc-meets-u-stat.html

auc_wmw <- function(labels, scores){
  labels <- as.logical(labels)
  pos <- scores[labels]
  neg <- scores[!labels]
  U <- as.numeric(wilcox.test(pos, neg)$statistic)
  U/(length(pos) * length(neg))
}

auc_wmw(fit$y, fit$fitted.values)
## [1] 0.8335714
  • Viết thống kê Wilcoxon-Mann-Whitney

\[ U_1 = R_1 - \frac{n_1(n_1 + 1)} {2} \]

auc_wmw2 <- function(labels, scores){
  labels <- as.logical(labels)
  n1 <- sum(labels)
  n2 <- sum(!labels)
  R1 <- sum(rank(scores)[labels])
  U1 <- R1 - n1 * (n1 + 1)/2
  U1/(n1 * n2)
}

auc_wmw2(fit$y, fit$fitted.values)
## [1] 0.8335714
LS0tDQp0aXRsZTogIkPDoWMgY8OhY2ggdMOtbmggdG/DoW4gQVVDIg0KYXV0aG9yOiAiTmd1eeG7hW4gTmfhu41jIELDrG5oIg0KZGF0ZTogIjMvMjMvMjAxOSINCm91dHB1dDogDQogIGh0bWxfZG9jdW1lbnQ6IA0KICAgIGNvZGVfZG93bmxvYWQ6IHRydWUNCiAgICBjb2RlX2ZvbGRpbmc6IGhpZGUNCiAgICBudW1iZXJfc2VjdGlvbnM6IHllcw0KICAgIHRoZW1lOiAiZGVmYXVsdCINCiAgICB0b2M6IFRSVUUNCiAgICB0b2NfZmxvYXQ6IFRSVUUNCi0tLQ0KDQpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0NCmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSkNCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShwUk9DKQ0KYGBgDQoNCiMgROG7ryBsaeG7h3UgbMOgIGLhu5kgZ2VybWFuY3JlZGl0DQoNCmBgYHtyfQ0KZGYgPC0gc2NvcmVjYXJkOjpnZXJtYW5jcmVkaXQNCg0KZGYgJT4lIGdsaW1wc2UoKQ0KYGBgDQoNCiMgVMOtbmggQVVDIGThu7FhIHRyw6puIHBhY2thZ2VzIA0KDQotIHBST0MNCg0KLSBTeW50YXggKHJlc3BvbnNlLCBwcmVkaWN0b3IpDQoNCmBgYHtyfQ0KDQpmaXQgPC0gZ2xtKGNyZWRpdGFiaWxpdHkgfiAuLCBmYW1pbHkgPSBiaW5vbWlhbChsaW5rID0gImxvZ2l0IiksIGRhdGEgPSBkZikNCg0KYXVjKGZpdCR5LCBmaXQkZml0dGVkLnZhbHVlcykNCg0KYGBgDQoNCiMgROG7sWEgdHLDqm4gdGjhu5FuZyBrw6ogV2lsY294b24tTWFubi1XaGl0bmV5DQoNCi0gTOG6pXkgc+G6tW4gY8O0bmcgdGjhu6ljIHdpbGNveC50ZXN0DQoNCiQkIEFVQ18xID0gXGZyYWN7VV8xfSB7bl8xbl8yfSQkDQoNCmBgYHtyfQ0KIyBodHRwczovL2Jsb2cucmV2b2x1dGlvbmFuYWx5dGljcy5jb20vMjAxNy8wMy9hdWMtbWVldHMtdS1zdGF0Lmh0bWwNCg0KYXVjX3dtdyA8LSBmdW5jdGlvbihsYWJlbHMsIHNjb3Jlcyl7DQogIGxhYmVscyA8LSBhcy5sb2dpY2FsKGxhYmVscykNCiAgcG9zIDwtIHNjb3Jlc1tsYWJlbHNdDQogIG5lZyA8LSBzY29yZXNbIWxhYmVsc10NCiAgVSA8LSBhcy5udW1lcmljKHdpbGNveC50ZXN0KHBvcywgbmVnKSRzdGF0aXN0aWMpDQogIFUvKGxlbmd0aChwb3MpICogbGVuZ3RoKG5lZykpDQp9DQoNCmF1Y193bXcoZml0JHksIGZpdCRmaXR0ZWQudmFsdWVzKQ0KYGBgDQoNCi0gVmnhur90IHRo4buRbmcga8OqIFdpbGNveG9uLU1hbm4tV2hpdG5leQ0KDQokJCBVXzEgPSBSXzEgLSBcZnJhY3tuXzEobl8xICsgMSl9IHsyfSAkJA0KDQpgYGB7cn0NCmF1Y193bXcyIDwtIGZ1bmN0aW9uKGxhYmVscywgc2NvcmVzKXsNCiAgbGFiZWxzIDwtIGFzLmxvZ2ljYWwobGFiZWxzKQ0KICBuMSA8LSBzdW0obGFiZWxzKQ0KICBuMiA8LSBzdW0oIWxhYmVscykNCiAgUjEgPC0gc3VtKHJhbmsoc2NvcmVzKVtsYWJlbHNdKQ0KICBVMSA8LSBSMSAtIG4xICogKG4xICsgMSkvMg0KICBVMS8objEgKiBuMikNCn0NCg0KYXVjX3dtdzIoZml0JHksIGZpdCRmaXR0ZWQudmFsdWVzKQ0KYGBgDQoNCg0K