Test set evaluation: AUC, Gini, confidence intervals, DeLong
test
library(data.table)
library(glmnet)
library(xgboost)
library(pROC)
library(knitr)
library(ggplot2)
library(kableExtra)
OUTPUT_DIR <- "/Users/amalianimeskern/Library/CloudStorage/OneDrive-ErasmusUniversityRotterdam/Freddie Mac Data"
# --- load models and test data ---
logistic_model <- readRDS(file.path(OUTPUT_DIR, "logistic_model.rds"))
best_lambda <- readRDS(file.path(OUTPUT_DIR, "best_lambda.rds"))
xgb_model <- readRDS(file.path(OUTPUT_DIR, "xgb_model.rds"))
test_woe <- readRDS(file.path(OUTPUT_DIR, "test_woe.rds"))
test_xgb <- readRDS(file.path(OUTPUT_DIR, "test_xgb.rds"))
# --- test matrices ---
woe_features <- setdiff(names(test_woe),
c("loan_sequence_number", "monthly_reporting_period",
"default_next_12m"))
X_test_woe <- as.matrix(test_woe[, ..woe_features])
y_test <- test_woe$default_next_12m
xgb_features <- setdiff(names(test_xgb),
c("loan_sequence_number", "monthly_reporting_period",
"default_next_12m"))
dtest <- xgb.DMatrix(data = as.matrix(test_xgb[, ..xgb_features]),
label = test_xgb$default_next_12m)
# --- predictions ---
preds_logistic <- predict(logistic_model, newx = X_test_woe, s = best_lambda,
type = "response")[, 1]
preds_xgb <- predict(xgb_model, dtest)
preds_nn <- readRDS(file.path(OUTPUT_DIR, "nn_test_probs_tuned.rds"))
# --- ROC curves ---
roc_logistic <- roc(y_test, preds_logistic, quiet = TRUE)
roc_xgb <- roc(y_test, preds_xgb, quiet = TRUE)
roc_nn <- roc(y_test, preds_nn, quiet = TRUE)
# --- AUC with 95% confidence intervals ---
ci_logistic <- ci.auc(roc_logistic, conf.level = 0.95)
ci_xgb <- ci.auc(roc_xgb, conf.level = 0.95)
ci_nn <- ci.auc(roc_nn, conf.level = 0.95)
# --- results table ---
results <- data.table(
Model = c("Logistic Regression", "XGBoost", "Neural Network"),
AUC = round(c(auc(roc_logistic), auc(roc_xgb), auc(roc_nn)), 4),
AUC_lower = round(c(ci_logistic[1], ci_xgb[1], ci_nn[1]), 4),
AUC_upper = round(c(ci_logistic[3], ci_xgb[3], ci_nn[3]), 4),
Gini = round(c(2 * auc(roc_logistic) - 1,
2 * auc(roc_xgb) - 1,
2 * auc(roc_nn) - 1), 4)
)
results
## Model AUC AUC_lower AUC_upper Gini
## <char> <num> <num> <num> <num>
## 1: Logistic Regression 0.8103 0.8042 0.8164 0.6207
## 2: XGBoost 0.8499 0.8444 0.8554 0.6997
## 3: Neural Network 0.8270 0.8206 0.8333 0.6540
# --- DeLong tests ---
delong_xgb_lr <- roc.test(roc_xgb, roc_logistic, method = "delong")
delong_nn_lr <- roc.test(roc_nn, roc_logistic, method = "delong")
delong_nn_xgb <- roc.test(roc_nn, roc_xgb, method = "delong")
delong_xgb_lr
##
## DeLong's test for two correlated ROC curves
##
## data: roc_xgb and roc_logistic
## Z = 20.865, p-value < 2.2e-16
## alternative hypothesis: true difference in AUC is not equal to 0
## 95 percent confidence interval:
## 0.03580600 0.04323045
## sample estimates:
## AUC of roc1 AUC of roc2
## 0.8498588 0.8103406
delong_nn_lr
##
## DeLong's test for two correlated ROC curves
##
## data: roc_nn and roc_logistic
## Z = 7.2559, p-value = 3.989e-13
## alternative hypothesis: true difference in AUC is not equal to 0
## 95 percent confidence interval:
## 0.01215355 0.02114929
## sample estimates:
## AUC of roc1 AUC of roc2
## 0.8269920 0.8103406
delong_nn_xgb
##
## DeLong's test for two correlated ROC curves
##
## data: roc_nn and roc_xgb
## Z = -13.46, p-value < 2.2e-16
## alternative hypothesis: true difference in AUC is not equal to 0
## 95 percent confidence interval:
## -0.02619653 -0.01953708
## sample estimates:
## AUC of roc1 AUC of roc2
## 0.8269920 0.8498588
# --- save ---
saveRDS(results, file.path(OUTPUT_DIR, "performance_results.rds"))
saveRDS(list(xgb_lr = delong_xgb_lr, nn_lr = delong_nn_lr, nn_xgb = delong_nn_xgb),
file.path(OUTPUT_DIR, "delong_tests.rds"))
saveRDS(list(logistic = preds_logistic, xgb = preds_xgb, nn = preds_nn),
file.path(OUTPUT_DIR, "test_predictions.rds"))
# --- performance table ---
fmt_p <- function(p) ifelse(p < 0.001, "< 0.001", as.character(round(p, 4)))
fmt_ci <- function(test) {
bounds <- round(test$conf.int, 3)
paste0("[", bounds[1], ", ", bounds[2], "]")
}
perf_table <- data.table(
Metric = c("AUC", "95% CI (AUC)", "Gini",
"DeLong vs LR (Z)", "p-value",
"95% CI (AUC diff vs LR)",
"DeLong vs XGBoost (Z)", "p-value",
"95% CI (AUC diff vs XGB)"),
`Logistic Regression` = c(
as.character(results$AUC[1]),
paste0("[", results$AUC_lower[1], ", ", results$AUC_upper[1], "]"),
as.character(results$Gini[1]),
"", "", "", "", "", ""
),
XGBoost = c(
as.character(results$AUC[2]),
paste0("[", results$AUC_lower[2], ", ", results$AUC_upper[2], "]"),
as.character(results$Gini[2]),
round(delong_xgb_lr$statistic, 3),
fmt_p(delong_xgb_lr$p.value),
fmt_ci(delong_xgb_lr),
"", "", ""
),
`Neural Network` = c(
as.character(results$AUC[3]),
paste0("[", results$AUC_lower[3], ", ", results$AUC_upper[3], "]"),
as.character(results$Gini[3]),
round(delong_nn_lr$statistic, 3),
fmt_p(delong_nn_lr$p.value),
fmt_ci(delong_nn_lr),
round(delong_nn_xgb$statistic, 3),
fmt_p(delong_nn_xgb$p.value),
fmt_ci(delong_nn_xgb)
)
)
kbl <- kable(perf_table, format = "html",
col.names = c("Metric", "Logistic Regression", "XGBoost", "Neural Network"))
writeLines(kbl, file.path(OUTPUT_DIR, "performance_table.html"))
# --- ROC plot ---
roc_dt <- rbind(
data.table(fpr = 1 - roc_logistic$specificities,
tpr = roc_logistic$sensitivities,
model = paste0("Logistic Regression (AUC = ", results$AUC[1], ")")),
data.table(fpr = 1 - roc_xgb$specificities,
tpr = roc_xgb$sensitivities,
model = paste0("XGBoost (AUC = ", results$AUC[2], ")")),
data.table(fpr = 1 - roc_nn$specificities,
tpr = roc_nn$sensitivities,
model = paste0("Neural Network (AUC = ", results$AUC[3], ")"))
)
p_roc <- ggplot(roc_dt, aes(x = fpr, y = tpr, color = model)) +
geom_line(linewidth = 0.8) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "grey50") +
scale_color_manual(values = c("#1976D2", "#D32F2F", "#388E3C")) +
labs(x = "False Positive Rate", y = "True Positive Rate",
title = "ROC Curves: Logistic Regression vs XGBoost vs Neural Network",
color = NULL) +
theme_classic() +
theme(plot.title = element_text(size = 13, face = "bold"),
legend.position = c(0.65, 0.25),
legend.background = element_rect(fill = "white", color = "grey80"))
ggsave(file.path(OUTPUT_DIR, "roc_curves.pdf"), p_roc, width = 8, height = 7)
# --- hyperparameters ---
best_params <- readRDS(file.path(OUTPUT_DIR, "best_hyperparams.rds"))
best_lambda <- readRDS(file.path(OUTPUT_DIR, "best_lambda.rds"))
nn_best_config <- readRDS(file.path(OUTPUT_DIR, "nn_best_config.rds"))
hyperparams_table <- data.table(
Model = c("XGBoost", "XGBoost", "XGBoost", "XGBoost", "XGBoost",
"Logistic Regression",
"Neural Network", "Neural Network", "Neural Network",
"Neural Network", "Neural Network", "Neural Network"),
Hyperparameter = c("Learning rate (η)", "Max tree depth", "Subsample",
"Column subsample", "Number of boosting rounds",
"Ridge penalty (λ)",
"Hidden layers", "Learning rate",
"Dropout rates", "Batch size",
"Maximum epochs", "Patience (early stopping)"),
Value = c(format(round(best_params$eta, 4), scientific = FALSE),
format(best_params$max_depth, scientific = FALSE),
format(round(best_params$subsample, 4), scientific = FALSE),
format(round(best_params$colsample_bytree, 4), scientific = FALSE),
format(xgb_model$niter, scientific = FALSE),
format(round(best_lambda, 6), scientific = FALSE),
gsub("-", " → ", nn_best_config$architecture),
format(nn_best_config$learning_rate, scientific = FALSE),
paste(nn_best_config$dropout1,
nn_best_config$dropout2,
nn_best_config$dropout3,
sep = " / "),
format(nn_best_config$batch_size, scientific = FALSE),
"50",
"5 epochs")
)
kbl_hp <- kable(
hyperparams_table,
format = "html",
col.names = c("Model", "Hyperparameter", "Value"),
caption = "Selected Hyperparameters"
)
writeLines(kbl_hp, file.path(OUTPUT_DIR, "hyperparameters_table.html"))