df <- scorecard::germancredit
df %>% glimpse()
## Observations: 1,000
## Variables: 21
## $ status.of.existing.checking.account <fct> ... <...
## $ duration.in.month <dbl> 6, 48...
## $ credit.history <fct> criti...
## $ purpose <chr> "radi...
## $ credit.amount <dbl> 1169,...
## $ savings.account.and.bonds <fct> unkno...
## $ present.employment.since <fct> ... >...
## $ installment.rate.in.percentage.of.disposable.income <dbl> 4, 2,...
## $ personal.status.and.sex <fct> male ...
## $ other.debtors.or.guarantors <fct> none,...
## $ present.residence.since <dbl> 4, 2,...
## $ property <fct> real ...
## $ age.in.years <dbl> 67, 2...
## $ other.installment.plans <fct> none,...
## $ housing <fct> own, ...
## $ number.of.existing.credits.at.this.bank <dbl> 2, 1,...
## $ job <fct> skill...
## $ number.of.people.being.liable.to.provide.maintenance.for <dbl> 1, 1,...
## $ telephone <fct> yes, ...
## $ foreign.worker <fct> yes, ...
## $ creditability <fct> good,...
pROC
Syntax (response, predictor)
fit <- glm(creditability ~ ., family = binomial(link = "logit"), data = df)
auc(fit$y, fit$fitted.values)
## Area under the curve: 0.8336
\[ AUC_1 = \frac{U_1} {n_1n_2}\]
# https://blog.revolutionanalytics.com/2017/03/auc-meets-u-stat.html
auc_wmw <- function(labels, scores){
labels <- as.logical(labels)
pos <- scores[labels]
neg <- scores[!labels]
U <- as.numeric(wilcox.test(pos, neg)$statistic)
U/(length(pos) * length(neg))
}
auc_wmw(fit$y, fit$fitted.values)
## [1] 0.8335714
\[ U_1 = R_1 - \frac{n_1(n_1 + 1)} {2} \]
auc_wmw2 <- function(labels, scores){
labels <- as.logical(labels)
n1 <- sum(labels)
n2 <- sum(!labels)
R1 <- sum(rank(scores)[labels])
U1 <- R1 - n1 * (n1 + 1)/2
U1/(n1 * n2)
}
auc_wmw2(fit$y, fit$fitted.values)
## [1] 0.8335714