Performance of models

Test set evaluation: AUC, Gini, confidence intervals, DeLong test

library(data.table)
library(glmnet)
library(xgboost)
library(pROC)
library(knitr)
library(ggplot2)
library(kableExtra)


OUTPUT_DIR <- "/Users/amalianimeskern/Library/CloudStorage/OneDrive-ErasmusUniversityRotterdam/Freddie Mac Data"

# --- Load models and test data ---
logistic_model <- readRDS(file.path(OUTPUT_DIR, "logistic_model.rds"))
best_lambda    <- readRDS(file.path(OUTPUT_DIR, "best_lambda.rds"))
xgb_model      <- readRDS(file.path(OUTPUT_DIR, "xgb_model.rds"))

test_woe <- readRDS(file.path(OUTPUT_DIR, "test_woe.rds"))
test_xgb <- readRDS(file.path(OUTPUT_DIR, "test_xgb.rds"))

# --- Test matrices ---
woe_features <- setdiff(names(test_woe),
                        c("loan_sequence_number", "monthly_reporting_period",
                          "default_next_12m"))
X_test_woe <- as.matrix(test_woe[, ..woe_features])
y_test     <- test_woe$default_next_12m

xgb_features <- setdiff(names(test_xgb),
                        c("loan_sequence_number", "monthly_reporting_period",
                          "default_next_12m"))
dtest <- xgb.DMatrix(data = as.matrix(test_xgb[, ..xgb_features]),
                     label = test_xgb$default_next_12m)

# --- Predictions ---
preds_logistic <- predict(logistic_model, newx = X_test_woe, s = best_lambda,
                          type = "response")[, 1]
preds_xgb      <- predict(xgb_model, dtest)

# --- ROC curves ---
roc_logistic <- roc(y_test, preds_logistic, quiet = TRUE)
roc_xgb      <- roc(y_test, preds_xgb, quiet = TRUE)

# --- AUC with 95% confidence intervals ---
ci_logistic <- ci.auc(roc_logistic, conf.level = 0.95)
ci_xgb      <- ci.auc(roc_xgb, conf.level = 0.95)

# --- Results table ---
results <- data.table(
  Model      = c("Logistic Regression", "XGBoost"),
  AUC        = round(c(auc(roc_logistic), auc(roc_xgb)), 4),
  AUC_lower  = round(c(ci_logistic[1], ci_xgb[1]), 4),
  AUC_upper  = round(c(ci_logistic[3], ci_xgb[3]), 4),
  Gini       = round(c(2 * auc(roc_logistic) - 1, 2 * auc(roc_xgb) - 1), 4)
)
results
##                  Model    AUC AUC_lower AUC_upper   Gini
##                 <char>  <num>     <num>     <num>  <num>
## 1: Logistic Regression 0.8103    0.8042    0.8164 0.6207
## 2:             XGBoost 0.8499    0.8444    0.8554 0.6997
# --- DeLong test ---
delong <- roc.test(roc_logistic, roc_xgb, method = "delong")
delong
## 
##  DeLong's test for two correlated ROC curves
## 
## data:  roc_logistic and roc_xgb
## Z = -20.865, p-value < 2.2e-16
## alternative hypothesis: true difference in AUC is not equal to 0
## 95 percent confidence interval:
##  -0.04323045 -0.03580600
## sample estimates:
## AUC of roc1 AUC of roc2 
##   0.8103406   0.8498588
# --- Save ---
saveRDS(results, file.path(OUTPUT_DIR, "performance_results.rds"))
saveRDS(delong,  file.path(OUTPUT_DIR, "delong_test.rds"))
saveRDS(list(logistic = preds_logistic, xgb = preds_xgb),
        file.path(OUTPUT_DIR, "test_predictions.rds"))


# --- Performance table ---
perf_table <- data.table(
  Metric = c("AUC", "95% CI (AUC)", "Gini",
             "DeLong test (Z)", "p-value", "95% CI (AUC difference)"),
  `Logistic Regression` = c(
    as.character(results$AUC[1]),
    paste0("[", results$AUC_lower[1], ", ", results$AUC_upper[1], "]"),
    as.character(results$Gini[1]),
    "", "", ""
  ),
  XGBoost = c(
    as.character(results$AUC[2]),
    paste0("[", results$AUC_lower[2], ", ", results$AUC_upper[2], "]"),
    as.character(results$Gini[2]),
    round(delong$statistic, 3),
    "< 0.001",
    paste0("[", round(abs(delong$conf.int[2]), 3), ", ",
           round(abs(delong$conf.int[1]), 3), "]")
  )
)

kable(perf_table, format = "html",
      col.names = c("Metric", "Logistic Regression", "XGBoost"))
Metric Logistic Regression XGBoost
AUC 0.8103 0.8499
95% CI (AUC) [0.8042, 0.8164] [0.8444, 0.8554]
Gini 0.6207 0.6997
DeLong test (Z) -20.865
p-value < 0.001
95% CI (AUC difference) [0.036, 0.043]
kbl <- kable(perf_table, format = "html",
             col.names = c("Metric", "Logistic Regression", "XGBoost"))
writeLines(kbl, file.path(OUTPUT_DIR, "performance_table.html"))

# --- ROC plot ---

roc_dt <- rbind(
  data.table(fpr = 1 - roc_logistic$specificities,
             tpr = roc_logistic$sensitivities,
             model = paste0("Logistic Regression (AUC = ", results$AUC[1], ")")),
  data.table(fpr = 1 - roc_xgb$specificities,
             tpr = roc_xgb$sensitivities,
             model = paste0("XGBoost (AUC = ", results$AUC[2], ")"))
)

p_roc <- ggplot(roc_dt, aes(x = fpr, y = tpr, color = model)) +
  geom_line(linewidth = 0.8) +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "grey50") +
  scale_color_manual(values = c("#1976D2", "#D32F2F")) +
  labs(x = "False Positive Rate", y = "True Positive Rate",
       title = "ROC Curves: Logistic Regression vs XGBoost",
       color = NULL) +
  theme_classic() +
  theme(plot.title = element_text(size = 13, face = "bold"),
        legend.position = c(0.7, 0.25),
        legend.background = element_rect(fill = "white", color = "grey80"))
p_roc <- ggplot(roc_dt, aes(x = fpr, y = tpr, color = model)) +
  geom_line(linewidth = 0.8) +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "grey50") +
  scale_color_manual(values = c("#1976D2", "#D32F2F")) +
  labs(x = "False Positive Rate", y = "True Positive Rate",
       title = "ROC Curves: Logistic Regression vs XGBoost",
       color = NULL) +
  theme_classic() +
  theme(plot.title = element_text(size = 13, face = "bold"),
        legend.position = c(0.7, 0.25),
        legend.background = element_rect(fill = "white", color = "grey80"))

p_roc

ggsave(file.path(OUTPUT_DIR, "roc_curves.pdf"), p_roc, width = 8, height = 7)

# --- Hyperparameters ---

best_params <- readRDS(file.path(OUTPUT_DIR, "best_hyperparams.rds"))
best_lambda <- readRDS(file.path(OUTPUT_DIR, "best_lambda.rds"))

hyperparams_table <- data.table(
  Model = c("XGBoost", "XGBoost", "XGBoost", "XGBoost", "XGBoost",
            "Logistic Regression"),
  Hyperparameter = c("Learning rate (η)", "Max tree depth", "Subsample",
                     "Column subsample", "Number of boosting rounds",
                     "Ridge penalty (λ)"),
  Value = c(format(round(best_params$eta, 4), scientific = FALSE),
            format(best_params$max_depth, scientific = FALSE),
            format(round(best_params$subsample, 4), scientific = FALSE),
            format(round(best_params$colsample_bytree, 4), scientific = FALSE),
            format(xgb_model$niter, scientific = FALSE),
            format(round(best_lambda, 6), scientific = FALSE))
)

kbl_hp <- kable(hyperparams_table, format = "html",
                col.names = c("Model", "Hyperparameter", "Value"),
                caption = "Selected Hyperparameters")
writeLines(kbl_hp, file.path(OUTPUT_DIR, "hyperparameters_table.html"))