Feedforward neural network model

Tuned on validation AUC

One hot encoded categorical variables (same set as XGBoost Model)

library(data.table)
library(keras3)
library(pROC)

OUTPUT_DIR <- "/Users/amalianimeskern/Library/CloudStorage/OneDrive-ErasmusUniversityRotterdam/Freddie Mac Data"

set.seed(123)
tensorflow::tf$random$set_seed(123L)

# --- load data ---

train_xgb <- readRDS(file.path(OUTPUT_DIR, "train_xgb.rds"))
valid_xgb <- readRDS(file.path(OUTPUT_DIR, "valid_xgb.rds"))
test_xgb  <- readRDS(file.path(OUTPUT_DIR, "test_xgb.rds"))

target <- "default_next_12m"

drop_cols <- c(
  "loan_sequence_number",
  "monthly_reporting_period",
  target
)

feature_cols <- setdiff(names(train_xgb), drop_cols)

# --- preprocessing: mean imputation and standardisation ---

train_means <- sapply(train_xgb[, ..feature_cols], function(x) mean(x, na.rm = TRUE))
train_sds   <- sapply(train_xgb[, ..feature_cols], function(x) sd(x, na.rm = TRUE))

train_sds[train_sds < 1e-8] <- 1

prepare_nn_data <- function(dt, feature_cols, target, train_means, train_sds) {
  
  X <- as.matrix(dt[, ..feature_cols])
  y <- dt[[target]]
  
  for (j in seq_along(feature_cols)) {
    na_idx <- is.na(X[, j])
    if (any(na_idx)) {
      X[na_idx, j] <- train_means[j]
    }
  }
  
  X <- sweep(X, 2, train_means, "-")
  X <- sweep(X, 2, train_sds, "/")
  
  list(X = X, y = y)
}

train_prep <- prepare_nn_data(train_xgb, feature_cols, target, train_means, train_sds)
valid_prep <- prepare_nn_data(valid_xgb, feature_cols, target, train_means, train_sds)
test_prep  <- prepare_nn_data(test_xgb,  feature_cols, target, train_means, train_sds)

rm(train_xgb, valid_xgb, test_xgb)
gc()

n_features <- length(feature_cols)

# --- model-building function ---

build_nn_model <- function(n_features,
                           units1,
                           units2,
                           units3,
                           dropout1,
                           dropout2,
                           dropout3,
                           learning_rate) {
  
  model <- keras_model_sequential(input_shape = n_features) |>
    layer_batch_normalization() |>
    layer_dense(units = units1, activation = "relu") |>
    layer_batch_normalization() |>
    layer_dropout(rate = dropout1) |>
    layer_dense(units = units2, activation = "relu") |>
    layer_batch_normalization() |>
    layer_dropout(rate = dropout2) |>
    layer_dense(units = units3, activation = "relu") |>
    layer_batch_normalization() |>
    layer_dropout(rate = dropout3) |>
    layer_dense(units = 1, activation = "sigmoid")
  
  model |> compile(
    optimizer = optimizer_adam(learning_rate = learning_rate),
    loss      = "binary_crossentropy",
    metrics   = list(metric_auc(name = "auc"))
  )
  
  model
}

# --- hyperparameter grid ---

param_grid <- expand.grid(
  architecture  = c("64-32-16", "128-64-32"),
  dropout_base  = c(0.2, 0.3),
  learning_rate = c(1e-3, 5e-4),
  batch_size    = c(4096),
  stringsAsFactors = FALSE
)

print(param_grid)

# --- hyperparameter tuning ---

results <- data.table()

best_auc <- -Inf
best_model <- NULL
best_config <- NULL

for (i in seq_len(nrow(param_grid))) {
  
  config <- param_grid[i, ]
  
  arch <- as.integer(strsplit(config$architecture, "-")[[1]])
  
  dropout1 <- config$dropout_base
  dropout2 <- max(config$dropout_base - 0.1, 0)
  dropout3 <- max(config$dropout_base - 0.2, 0)
  
  model <- build_nn_model(
    n_features    = n_features,
    units1        = arch[1],
    units2        = arch[2],
    units3        = arch[3],
    dropout1      = dropout1,
    dropout2      = dropout2,
    dropout3      = dropout3,
    learning_rate = config$learning_rate
  )
  
  callbacks <- list(
    callback_early_stopping(
      monitor = "val_auc",
      mode = "max",
      patience = 5,
      min_delta = 0.0005,
      restore_best_weights = TRUE
    ),
    callback_reduce_lr_on_plateau(
      monitor = "val_auc",
      mode = "max",
      factor = 0.5,
      patience = 2
    )
  )
  
  history <- model |> fit(
    x = train_prep$X,
    y = train_prep$y,
    validation_data = list(valid_prep$X, valid_prep$y),
    epochs = 50,
    batch_size = config$batch_size,
    callbacks = callbacks,
    verbose = 1
  )
  
  valid_probs <- as.numeric(model |> predict(valid_prep$X, verbose = 0))
  valid_auc <- as.numeric(auc(roc(valid_prep$y, valid_probs, quiet = TRUE)))
  
  result_i <- data.table(
    model_id       = i,
    architecture   = config$architecture,
    dropout_base   = config$dropout_base,
    dropout1       = dropout1,
    dropout2       = dropout2,
    dropout3       = dropout3,
    learning_rate  = config$learning_rate,
    batch_size     = config$batch_size,
    validation_auc = valid_auc
  )
  
  results <- rbind(results, result_i)
  
  print(result_i)
  
  if (valid_auc > best_auc) {
    best_auc <- valid_auc
    best_model <- model
    best_config <- result_i
  }
  
  gc()
}

# --- tuning results ---

results <- results[order(-validation_auc)]

print(results)
print(best_config)

# --- test set evaluation ---

nn_probs  <- as.numeric(best_model |> predict(test_prep$X, verbose = 0))
nn_labels <- test_prep$y

roc_nn  <- roc(nn_labels, nn_probs, quiet = TRUE)
auc_nn  <- as.numeric(auc(roc_nn))
ci_nn   <- ci.auc(roc_nn)
gini_nn <- 2 * auc_nn - 1

print(round(auc_nn, 4))
print(round(ci_nn[c(1, 3)], 4))
print(round(gini_nn, 4))

# --- save ---

saveRDS(results, file.path(OUTPUT_DIR, "nn_tuning_results.rds"))
saveRDS(best_config, file.path(OUTPUT_DIR, "nn_best_config.rds"))
saveRDS(nn_probs, file.path(OUTPUT_DIR, "nn_test_probs_tuned.rds"))

best_model |> save_model(file.path(OUTPUT_DIR, "nn_model_tuned.keras"))

# --- test set evaluation ---

nn_probs  <- as.numeric(best_model |> predict(test_prep$X, verbose = 0))
nn_labels <- test_prep$y

roc_nn  <- roc(nn_labels, nn_probs, quiet = TRUE)
auc_nn  <- as.numeric(auc(roc_nn))
ci_nn   <- ci.auc(roc_nn)
gini_nn <- 2 * auc_nn - 1

print(round(auc_nn, 4))
print

# --- save ---

saveRDS(results, file.path(OUTPUT_DIR, "nn_tuning_results.rds"))
saveRDS(best_config, file.path(OUTPUT_DIR, "nn_best_config.rds"))
saveRDS(nn_probs, file.path(OUTPUT_DIR, "nn_test_probs_tuned.rds"))

best_model |> save_model(file.path(OUTPUT_DIR, "nn_model_tuned.keras"))