library(randomForest)
library(fastshap)
library(dplyr)
library(caret)
library(pROC)
library(PRROC)
library(prg)
library(tidyr)
library(patchwork)
library(cowplot)
library(ggplot2)
df_test <- read.csv("~/Downloads/read-ai/df_test_f_v2.csv")
df_external <- read.csv("~/Downloads/read-ai/df_external_f_v2.csv")
cat(sprintf("Study 1 (internal): n=%d IR=%d Non-IR=%d prevalence=%.3f\n",
nrow(df_test),
sum(df_test$IR_label == "IR"),
sum(df_test$IR_label == "Non-IR"),
mean(df_test$IR_label == "IR")))
## Study 1 (internal): n=97 IR=32 Non-IR=65 prevalence=0.330
cat(sprintf("Study 2 (external): n=%d IR=%d Non-IR=%d prevalence=%.3f\n",
nrow(df_external),
sum(df_external$IR_label == "IR"),
sum(df_external$IR_label == "Non-IR"),
mean(df_external$IR_label == "IR")))
## Study 2 (external): n=61 IR=19 Non-IR=42 prevalence=0.311
feature_sets <- list(
CGM_model = c(
"bmi", "mean_fasting_glucose", "sd_fasting_glucose",
"excursions_per_day", "rec_time_50pct_median",
"overall_max_glucose", "age", "whr"
),
Smartwatch_model = c(
"stress_overall_daily_mean_stress", "sd_hr_day_night_diff",
"age", "bmi", "whr"
),
Baseline_model = c("bmi", "age", "whr")
)
model_names <- c("Model CGM", "Model smartwatch", "Model baseline")
for (nm in names(feature_sets)) {
p <- length(feature_sets[[nm]])
cat(sprintf(" %-20s p = %d mtry = %d\n", nm, p, floor(sqrt(p))))
}
## CGM_model p = 8 mtry = 2
## Smartwatch_model p = 5 mtry = 2
## Baseline_model p = 3 mtry = 1
standardize <- function(df, model_name) {
df <- as.data.frame(df)
df$Model <- model_name
df
}
# AUPRG — Precision-Recall Gain curve (Flach & Kull, NeurIPS 2015).
# Random classifier → 0; perfect classifier → 1.
# install.packages("prg") if needed.
compute_auprg <- function(labels, scores) {
tryCatch({
curve <- prg::create_prg_curve(labels, scores)
prg::calc_auprg(curve)
}, error = function(e) NA_real_)
}
Trains an RF on the 70% training partition of each bootstrap split.
mtry = floor(sqrt(p)) throughout; upsampling applied within
each split. SHAP values are computed on each 30% holdout for internal
visualisation.
train_model_with_shap <- function(features, df, train_idx) {
X <- df %>%
dplyr::select(all_of(features)) %>%
mutate(across(where(is.numeric), as.double))
y <- factor(trimws(df$IR_label))
levels(y) <- make.names(levels(y))
X_train <- X[train_idx, ]; X_test <- X[-train_idx, ]
y_train <- y[train_idx]; y_test <- y[-train_idx]
p <- length(features)
mtry <- floor(sqrt(p))
ctrl <- trainControl(
method = "repeatedcv", number = 5, repeats = 20,
sampling = "up", classProbs = TRUE,
summaryFunction = twoClassSummary
)
model <- train(
x = X_train, y = y_train,
method = "rf", trControl = ctrl,
preProcess = "medianImpute",
tuneGrid = expand.grid(mtry = mtry),
ntree = 300, metric = "ROC"
)
prob_test <- predict(model, X_test, type = "prob")[, "IR"]
pred_test <- predict(model, X_test)
cm_test <- confusionMatrix(pred_test, y_test)
auc_test <- as.numeric(auc(roc(y_test, prob_test,
levels = c("Non.IR","IR"))))
pr_test <- pr.curve(scores.class0 = prob_test,
weights.class0 = ifelse(y_test == "IR", 1, 0),
curve = FALSE)
internal_holdout_metrics <- data.frame(
Accuracy = cm_test$overall["Accuracy"],
Sensitivity = cm_test$byClass["Sensitivity"],
Specificity = cm_test$byClass["Specificity"],
Balanced_Accuracy = (cm_test$byClass["Sensitivity"] +
cm_test$byClass["Specificity"]) / 2,
Precision = cm_test$byClass["Pos Pred Value"],
F1 = 2 * cm_test$byClass["Pos Pred Value"] *
cm_test$byClass["Sensitivity"] /
(cm_test$byClass["Pos Pred Value"] +
cm_test$byClass["Sensitivity"]),
AUC = auc_test,
PRAUC = pr_test$auc.integral,
AUPRG = compute_auprg(as.integer(y_test == "IR"), prob_test)
)
pfun <- function(object, newdata)
predict(object, newdata, type = "prob")[, "IR"]
shap_values <- fastshap::explain(
object = model, X = as.data.frame(X_test),
pred_wrapper = pfun, nsim = 50, adjust = TRUE
)
list(
model = model,
features = features,
internal_holdout_metrics = internal_holdout_metrics,
shap_validation = shap_values,
X_test_df = as.data.frame(X_test)
)
}
Fits the RF directly (no caret CV) on all internal data for a given
seed. Hyperparameters are fixed a priori so cross-validation is not
required here. Pipeline:
medianImpute → upSample → randomForest.
fit_rf_seed <- function(features, df_train, seed) {
set.seed(seed)
p <- length(features)
mtry <- floor(sqrt(p))
X_tr <- df_train %>%
dplyr::select(all_of(features)) %>%
mutate(across(where(is.numeric), as.double))
y_tr <- factor(make.names(trimws(df_train$IR_label)))
imp <- caret::preProcess(X_tr, method = "medianImpute")
X_tr_imp <- predict(imp, X_tr)
up_data <- caret::upSample(x = X_tr_imp, y = y_tr, yname = ".outcome")
rf <- randomForest::randomForest(
x = up_data[, names(X_tr_imp), drop = FALSE],
y = up_data$.outcome,
ntree = 300, mtry = mtry, nodesize = 1
)
list(rf = rf, imp = imp, features = features)
}
predict_rf <- function(fit_obj, df_new) {
X_new <- df_new %>%
dplyr::select(all_of(fit_obj$features)) %>%
mutate(across(where(is.numeric), as.double))
X_imp <- predict(fit_obj$imp, X_new)
predict(fit_obj$rf, X_imp, type = "prob")[, "IR"]
}
Fits 20 independent RF models (seeds 1–20) on all internal data. Ensemble = mean predicted probability across seeds per patient. AUC-ROC CI: DeLong on ensemble. AUPRC / AUPRG CI: 2,000-iteration bootstrap.
evaluate_multiseed <- function(features, df_train, df_ext,
N_SEEDS = 20, n_boot = 2000) {
y_ext <- factor(make.names(trimws(df_ext$IR_label)))
all_probs <- matrix(NA_real_, nrow = nrow(df_ext), ncol = N_SEEDS)
all_aucs <- numeric(N_SEEDS)
all_auprcs <- numeric(N_SEEDS)
all_auprgs <- numeric(N_SEEDS)
for (s in seq_len(N_SEEDS)) {
fit <- fit_rf_seed(features, df_train, seed = s)
prob <- predict_rf(fit, df_ext)
all_probs[, s] <- prob
roc_s <- roc(y_ext, prob, levels = c("Non.IR","IR"), quiet = TRUE)
all_aucs[s] <- as.numeric(auc(roc_s))
all_auprcs[s] <- pr.curve(scores.class0 = prob,
weights.class0 = ifelse(y_ext == "IR", 1, 0),
curve = FALSE)$auc.integral
all_auprgs[s] <- compute_auprg(as.integer(y_ext == "IR"), prob)
}
ens <- rowMeans(all_probs)
# AUC-ROC: DeLong CI on ensemble
roc_ens <- roc(y_ext, ens, levels = c("Non.IR","IR"), quiet = TRUE)
ens_auc <- as.numeric(auc(roc_ens))
dl <- tryCatch(
as.numeric(ci.auc(roc_ens, method = "delong", conf.level = 0.95)),
error = function(e) c(NA, NA, NA))
# AUPRC / AUPRG: point estimates on ensemble
ens_auprc <- pr.curve(scores.class0 = ens,
weights.class0 = ifelse(y_ext == "IR", 1, 0),
curve = FALSE)$auc.integral
ens_auprg <- compute_auprg(as.integer(y_ext == "IR"), ens)
# Bootstrap CI for AUPRC and AUPRG on ensemble
set.seed(42)
n_ext <- length(y_ext)
prauc_boot <- auprg_boot <- numeric(n_boot)
for (i in seq_len(n_boot)) {
idx <- sample(n_ext, n_ext, replace = TRUE)
yb <- y_ext[idx]; pb <- ens[idx]
if (length(unique(yb)) < 2) { prauc_boot[i] <- NA; auprg_boot[i] <- NA; next }
prauc_boot[i] <- tryCatch(
pr.curve(scores.class0=pb, weights.class0=ifelse(yb=="IR",1,0),
curve=FALSE)$auc.integral, error=function(e) NA_real_)
auprg_boot[i] <- tryCatch(
compute_auprg(as.integer(yb=="IR"), pb), error=function(e) NA_real_)
}
# Classification metrics on ensemble at 0.5 threshold
ens_pred <- factor(ifelse(ens >= 0.5, "IR", "Non.IR"),
levels = c("Non.IR","IR"))
cm_ens <- confusionMatrix(ens_pred, y_ext)
list(
auc_mean = round(mean(all_aucs), 3),
auc_sd = round(sd(all_aucs), 3),
auc_range = range(all_aucs),
auprc_mean = round(mean(all_auprcs),3),
auprc_sd = round(sd(all_auprcs), 3),
auprg_mean = round(mean(all_auprgs),3),
auprg_sd = round(sd(all_auprgs), 3),
all_aucs = all_aucs,
all_auprcs = all_auprcs,
all_auprgs = all_auprgs,
ens_auc = round(ens_auc, 3),
ens_auc_lo = round(dl[1], 3),
ens_auc_hi = round(dl[3], 3),
ens_auprc = round(ens_auprc,3),
ens_auprc_lo= round(quantile(prauc_boot,0.025,na.rm=TRUE),3),
ens_auprc_hi= round(quantile(prauc_boot,0.975,na.rm=TRUE),3),
ens_auprg = round(ens_auprg,3),
ens_auprg_lo= round(quantile(auprg_boot,0.025,na.rm=TRUE),3),
ens_auprg_hi= round(quantile(auprg_boot,0.975,na.rm=TRUE),3),
ensemble_prob = ens,
y_ext = y_ext,
cm = cm_ens,
N_SEEDS = N_SEEDS
)
}
100 stratified 70/30 splits. AUC-ROC, AUPRC, and AUPRG are collected per iteration; CIs are 2.5–97.5 percentiles across iterations.
all_results <- list()
all_shap_val <- list()
set.seed(123)
n_iter <- 100
for (i in seq_len(n_iter)) {
y <- factor(trimws(df_test$IR_label))
levels(y) <- make.names(levels(y))
train_idx <- createDataPartition(y, p = 0.7, list = FALSE)
CGM_model <- train_model_with_shap(feature_sets$CGM_model, df_test, train_idx)
SW_model <- train_model_with_shap(feature_sets$Smartwatch_model, df_test, train_idx)
Base_model <- train_model_with_shap(feature_sets$Baseline_model, df_test, train_idx)
trained_models <- list(CGM_model, SW_model, Base_model)
iter_results <- dplyr::bind_rows(
standardize(CGM_model$internal_holdout_metrics, "Model CGM") |> mutate(Dataset = "Internal"),
standardize(SW_model$internal_holdout_metrics, "Model smartwatch") |> mutate(Dataset = "Internal"),
standardize(Base_model$internal_holdout_metrics, "Model baseline") |> mutate(Dataset = "Internal")
) %>% mutate(iteration = i)
all_results[[i]] <- iter_results
for (m in seq_along(trained_models)) {
mdl <- trained_models[[m]]
all_shap_val[[length(all_shap_val) + 1]] <- list(
iteration = i,
model_name = model_names[m],
shap = mdl$shap_validation,
X_df = mdl$X_test_df
)
}
if (i %% 10 == 0) cat(sprintf(" Iteration %d / %d\n", i, n_iter))
}
## Iteration 10 / 100
## Iteration 20 / 100
## Iteration 30 / 100
## Iteration 40 / 100
## Iteration 50 / 100
## Iteration 60 / 100
## Iteration 70 / 100
## Iteration 80 / 100
## Iteration 90 / 100
## Iteration 100 / 100
bootstrap_results <- dplyr::bind_rows(all_results)
final_summary <- bootstrap_results %>%
group_by(Model, Dataset) %>%
summarise(
across(
c(Accuracy, Sensitivity, Specificity, Balanced_Accuracy,
Precision, F1, AUC, PRAUC, AUPRG),
list(mean = ~mean(.x, na.rm = TRUE),
lower = ~quantile(.x, 0.025, na.rm = TRUE),
upper = ~quantile(.x, 0.975, na.rm = TRUE)),
.names = "{.col}_{.fn}"
),
.groups = "drop"
)
cat("\nInternal performance summary (mean [2.5%, 97.5%]):\n")
##
## Internal performance summary (mean [2.5%, 97.5%]):
print(
final_summary %>%
dplyr::select(Model,
AUC_mean, AUC_lower, AUC_upper,
PRAUC_mean, PRAUC_lower, PRAUC_upper,
AUPRG_mean, AUPRG_lower, AUPRG_upper) %>%
dplyr::rename(
AUC_ROC = AUC_mean, AUC_lo = AUC_lower, AUC_hi = AUC_upper,
AUPRC = PRAUC_mean, AUPRC_lo= PRAUC_lower, AUPRC_hi= PRAUC_upper,
AUPRG_lo = AUPRG_lower,AUPRG_hi= AUPRG_upper
) %>%
mutate(across(where(is.numeric), ~round(.x, 3)))
)
## # A tibble: 3 × 10
## Model AUC_ROC AUC_lo AUC_hi AUPRC AUPRC_lo AUPRC_hi AUPRG_mean AUPRG_lo
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Model CGM 0.833 0.684 0.953 0.725 0.488 0.915 0.742 0.363
## 2 Model basel… 0.817 0.655 0.933 0.644 0.419 0.853 0.68 0.359
## 3 Model smart… 0.812 0.663 0.933 0.637 0.43 0.875 0.677 0.356
## # ℹ 1 more variable: AUPRG_hi <dbl>
write.csv(bootstrap_results, "bootstrap_results.csv", row.names = FALSE)
write.csv(final_summary, "bootstrap_summary.csv", row.names = FALSE)
Each model is refitted 20 times (seeds 1–20) on all 97 internal samples. The ensemble prediction is the mean probability across seeds per patient.
cat("Evaluating external cohort (20 seeds per model)...\n")
## Evaluating external cohort (20 seeds per model)...
ms_CGM <- evaluate_multiseed(feature_sets$CGM_model, df_test, df_external)
ms_SW <- evaluate_multiseed(feature_sets$Smartwatch_model, df_test, df_external)
ms_Base <- evaluate_multiseed(feature_sets$Baseline_model, df_test, df_external)
ms_list <- list(
"CGM model" = ms_CGM,
"Smartwatch model" = ms_SW,
"Baseline model" = ms_Base
)
ms_summary <- dplyr::bind_rows(lapply(names(ms_list), function(nm) {
ms <- ms_list[[nm]]
data.frame(
Model = nm,
AUC_mean = ms$auc_mean, AUC_sd = ms$auc_sd,
Ens_AUC = ms$ens_auc, Ens_AUC_lo = ms$ens_auc_lo,
Ens_AUC_hi = ms$ens_auc_hi,
AUPRC_mean = ms$auprc_mean, AUPRC_sd = ms$auprc_sd,
Ens_AUPRC = ms$ens_auprc, Ens_AUPRC_lo = ms$ens_auprc_lo,
Ens_AUPRC_hi = ms$ens_auprc_hi,
AUPRG_mean = ms$auprg_mean, AUPRG_sd = ms$auprg_sd,
Ens_AUPRG = ms$ens_auprg, Ens_AUPRG_lo = ms$ens_auprg_lo,
Ens_AUPRG_hi = ms$ens_auprg_hi,
stringsAsFactors = FALSE
)
}))
cat("\nExternal validation results:\n")
##
## External validation results:
print(ms_summary %>% mutate(across(where(is.numeric), ~round(.x, 3))))
## Model AUC_mean AUC_sd Ens_AUC Ens_AUC_lo Ens_AUC_hi
## 2.5%...1 CGM model 0.878 0.012 0.890 0.789 0.990
## 2.5%...2 Smartwatch model 0.794 0.011 0.797 0.674 0.920
## 2.5%...3 Baseline model 0.753 0.009 0.763 0.625 0.901
## AUPRC_mean AUPRC_sd Ens_AUPRC Ens_AUPRC_lo Ens_AUPRC_hi AUPRG_mean
## 2.5%...1 0.802 0.032 0.825 0.633 0.954 0.866
## 2.5%...2 0.698 0.019 0.708 0.486 0.858 0.725
## 2.5%...3 0.568 0.035 0.582 0.373 0.826 0.633
## AUPRG_sd Ens_AUPRG Ens_AUPRG_lo Ens_AUPRG_hi
## 2.5%...1 0.033 0.883 0.715 0.983
## 2.5%...2 0.025 0.740 0.364 0.934
## 2.5%...3 0.026 0.679 0.321 0.901
ext_probs <- list(
"CGM model" = ms_CGM$ensemble_prob,
"Smartwatch model" = ms_SW$ensemble_prob,
"Baseline model" = ms_Base$ensemble_prob
)
y_ext_fac <- ms_CGM$y_ext
y_bin <- as.integer(y_ext_fac == "IR")
ir_prev <- mean(y_bin)
compute_ext_full <- function(y_true, prob, model_name, n_boot = 2000, seed = 42) {
n <- length(y_true)
pred <- factor(ifelse(prob >= 0.5, "IR", "Non.IR"), levels = c("Non.IR","IR"))
cm <- confusionMatrix(pred, y_true)
sens <- as.numeric(cm$byClass["Sensitivity"])
spec <- as.numeric(cm$byClass["Specificity"])
prec <- as.numeric(cm$byClass["Pos Pred Value"])
f1 <- ifelse(is.na(prec)|prec+sens==0, NA_real_, 2*prec*sens/(prec+sens))
acc <- as.numeric(cm$overall["Accuracy"])
auprc<- pr.curve(scores.class0=prob, weights.class0=ifelse(y_true=="IR",1,0),
curve=FALSE)$auc.integral
auprg<- compute_auprg(as.integer(y_true=="IR"), prob)
set.seed(seed)
mat <- matrix(NA_real_, nrow=n_boot, ncol=8,
dimnames=list(NULL, c("sens","spec","prec","f1","acc",
"balacc","prauc","auprg")))
for (i in seq_len(n_boot)) {
idx <- sample(n, n, replace=TRUE)
yb <- y_true[idx]; pb_cls <- pred[idx]; pb_pr <- prob[idx]
if (length(unique(yb)) < 2) next
cm_b <- confusionMatrix(pb_cls, yb)
s_b <- as.numeric(cm_b$byClass["Sensitivity"])
sp_b <- as.numeric(cm_b$byClass["Specificity"])
pr_b <- as.numeric(cm_b$byClass["Pos Pred Value"])
f1_b <- ifelse(is.na(pr_b)|pr_b+s_b==0, NA_real_, 2*pr_b*s_b/(pr_b+s_b))
prauc_b <- tryCatch(
pr.curve(scores.class0=pb_pr, weights.class0=ifelse(yb=="IR",1,0),
curve=FALSE)$auc.integral, error=function(e) NA_real_)
auprg_b <- tryCatch(
compute_auprg(as.integer(yb=="IR"), pb_pr), error=function(e) NA_real_)
mat[i,] <- c(s_b, sp_b, pr_b, f1_b, as.numeric(cm_b$overall["Accuracy"]),
(s_b+sp_b)/2, prauc_b, auprg_b)
}
ci <- apply(mat, 2, quantile, probs=c(0.025,0.975), na.rm=TRUE)
data.frame(
Model = model_name,
Sensitivity = round(sens,3), Sensitivity_lo = round(ci["2.5%","sens"],3), Sensitivity_hi = round(ci["97.5%","sens"],3),
Specificity = round(spec,3), Specificity_lo = round(ci["2.5%","spec"],3), Specificity_hi = round(ci["97.5%","spec"],3),
Balanced_Accuracy= round((sens+spec)/2,3),
BalAcc_lo = round(ci["2.5%","balacc"],3), BalAcc_hi = round(ci["97.5%","balacc"],3),
Precision = round(prec,3), Precision_lo = round(ci["2.5%","prec"],3), Precision_hi = round(ci["97.5%","prec"],3),
F1 = round(f1,3), F1_lo = round(ci["2.5%","f1"],3), F1_hi = round(ci["97.5%","f1"],3),
Accuracy = round(acc,3), Accuracy_lo = round(ci["2.5%","acc"],3), Accuracy_hi = round(ci["97.5%","acc"],3),
AUPRC = round(auprc,3),AUPRC_lo = round(ci["2.5%","prauc"],3), AUPRC_hi = round(ci["97.5%","prauc"],3),
AUPRG = round(auprg,3),AUPRG_lo = round(ci["2.5%","auprg"],3), AUPRG_hi = round(ci["97.5%","auprg"],3),
stringsAsFactors = FALSE
)
}
ext_full <- dplyr::bind_rows(lapply(names(ext_probs), function(nm)
compute_ext_full(y_ext_fac, ext_probs[[nm]], nm)
)) %>%
left_join(ms_summary %>%
dplyr::select(Model, AUC_mean, AUC_sd, Ens_AUC,
Ens_AUC_lo, Ens_AUC_hi,
AUPRC_mean, AUPRC_sd, Ens_AUPRC,
Ens_AUPRC_lo, Ens_AUPRC_hi,
AUPRG_mean, AUPRG_sd, Ens_AUPRG,
Ens_AUPRG_lo, Ens_AUPRG_hi),
by = "Model")
cat("Full external metrics:\n")
## Full external metrics:
print(ext_full %>% dplyr::select(Model, AUC_mean, AUC_sd, Ens_AUC_lo, Ens_AUC_hi,
AUPRC_mean, AUPRC_sd, AUPRG_mean, AUPRG_sd,
Sensitivity, Specificity, Balanced_Accuracy, F1))
## Model AUC_mean AUC_sd Ens_AUC_lo Ens_AUC_hi AUPRC_mean AUPRC_sd
## 1 CGM model 0.878 0.012 0.789 0.990 0.802 0.032
## 2 Smartwatch model 0.794 0.011 0.674 0.920 0.698 0.019
## 3 Baseline model 0.753 0.009 0.625 0.901 0.568 0.035
## AUPRG_mean AUPRG_sd Sensitivity Specificity Balanced_Accuracy F1
## 1 0.866 0.033 0.842 0.810 0.826 0.744
## 2 0.725 0.025 0.737 0.714 0.726 0.622
## 3 0.633 0.026 0.737 0.690 0.714 0.609
write.csv(ext_full, "external_results.csv", row.names = FALSE)
seed_df <- dplyr::bind_rows(lapply(names(ms_list), function(nm) {
data.frame(Model = nm,
AUC = ms_list[[nm]]$all_aucs,
AUPRC = ms_list[[nm]]$all_auprcs,
AUPRG = ms_list[[nm]]$all_auprgs)
})) %>% mutate(Model = factor(Model, levels = names(ms_list)))
model_colors <- c("CGM model"="#1f77b4","Smartwatch model"="#2ca02c",
"Baseline model"="#d62728")
model_labels <- c("CGM model"="CGM","Smartwatch model"="Smartwatch",
"Baseline model"="Baseline")
summ_seed <- seed_df %>%
group_by(Model) %>%
summarise(m_auc=mean(AUC), sd_auc=sd(AUC), mx_auc=max(AUC),
m_auprc=mean(AUPRC),sd_auprc=sd(AUPRC),mx_auprc=max(AUPRC),
m_auprg=mean(AUPRG),sd_auprg=sd(AUPRG),mx_auprg=max(AUPRG),
.groups="drop")
make_seed_panel <- function(df, summ, y_var, y_lab, y_ref=NULL, title="") {
m_col <- paste0("m_", tolower(y_var))
sd_col <- paste0("sd_", tolower(y_var))
mx_col <- paste0("mx_", tolower(y_var))
p <- ggplot(df, aes(x=Model, y=.data[[y_var]], colour=Model)) +
geom_jitter(width=0.12, alpha=0.65, size=2, show.legend=FALSE) +
geom_crossbar(data=summ,
aes(x=Model, y=.data[[m_col]],
ymin=.data[[m_col]]-.data[[sd_col]],
ymax=.data[[m_col]]+.data[[sd_col]]),
width=0.35, linewidth=0.8, fill=NA, show.legend=FALSE) +
geom_text(data=summ,
aes(x=Model, y=.data[[mx_col]]+0.02,
label=sprintf("%.3f\n\u00b1%.3f", .data[[m_col]], .data[[sd_col]])),
size=3, vjust=0, show.legend=FALSE) +
scale_colour_manual(values=model_colors) +
scale_x_discrete(labels=model_labels) +
coord_cartesian(ylim=c(0.35,1.08)) +
labs(title=title, x=NULL, y=y_lab) +
theme_bw(base_size=11) +
theme(plot.title=element_text(face="bold",size=11),
panel.grid.minor=element_blank(),
panel.grid.major.x=element_blank())
if (!is.null(y_ref))
p <- p + geom_hline(yintercept=y_ref, linetype="dashed",
colour="grey50", linewidth=0.7)
p
}
p_seed_auc <- make_seed_panel(seed_df, summ_seed, "AUC", "AUC-ROC", 0.5, "A AUC-ROC")
p_seed_auprc <- make_seed_panel(seed_df, summ_seed, "AUPRC", "AUPRC", ir_prev,"B AUPRC") +
annotate("text", x=0.6, y=ir_prev+0.05, label=sprintf("Chance (%.2f)", ir_prev),
size=3, colour="grey50")
p_seed_auprg <- make_seed_panel(seed_df, summ_seed, "AUPRG", "AUPRG", 0, "C AUPRG") +
annotate("text", x=0.6, y=0.05, label="Chance = 0", size=3, colour="grey50")
p_seed_auc | p_seed_auprc | p_seed_auprg
Net benefit computed manually:
NB(t) = TP/n − FP/n × t/(1−t). All models are evaluated on
ensemble probabilities from the 20-seed fit.
n_ext <- length(y_bin)
thresholds <- seq(0.05, 0.50, by = 0.01)
nb_fn <- function(y, prob, t) {
pred <- as.integer(prob >= t)
tp <- sum(pred == 1 & y == 1)
fp <- sum(pred == 1 & y == 0)
tp / n_ext - fp / n_ext * t / (1 - t)
}
dca_df <- do.call(rbind, lapply(thresholds, function(t) {
data.frame(
threshold = t,
model = c("CGM","SW","Baseline","All","None"),
net_benefit = c(
nb_fn(y_bin, ext_probs[["CGM model"]], t),
nb_fn(y_bin, ext_probs[["Smartwatch model"]], t),
nb_fn(y_bin, ext_probs[["Baseline model"]], t),
ir_prev - (1 - ir_prev) * t / (1 - t),
0),
stringsAsFactors = FALSE)
}))
p_dca <- ggplot(dca_df, aes(x=threshold, y=net_benefit,
colour=model, linetype=model)) +
geom_line(linewidth=0.9) +
scale_colour_manual(
values = c("CGM"="#1f77b4","SW"="#2ca02c","Baseline"="#d62728",
"All"="#888888","None"="#bbbbbb"),
labels = c("CGM"="CGM model","SW"="Smartwatch model",
"Baseline"="Baseline","All"="Treat all","None"="Treat none")) +
scale_linetype_manual(
values = c("CGM"="solid","SW"="dashed","Baseline"="dotdash",
"All"="dotted","None"="dotted"),
labels = c("CGM"="CGM model","SW"="Smartwatch model",
"Baseline"="Baseline","All"="Treat all","None"="Treat none")) +
geom_hline(yintercept=0, colour="black", linewidth=0.5) +
geom_vline(xintercept=ir_prev, colour="black",
linetype="dashed", linewidth=0.6, alpha=0.4) +
annotate("text", x=ir_prev+0.01, y=0.38,
label=sprintf("Prevalence\n(%.2f)", ir_prev),
size=3, colour="grey40", hjust=0) +
coord_cartesian(xlim=c(0.05,0.50), ylim=c(-0.06,0.42)) +
labs(x="Threshold probability", y="Net benefit",
colour=NULL, linetype=NULL) +
theme_bw(base_size=12) +
theme(legend.position="right", panel.grid.minor=element_blank())
print(p_dca)
ggsave("dca.pdf", p_dca, width=7.5, height=5, device=cairo_pdf)
## Warning in grSoftVersion(): unable to load shared object '/Library/Frameworks/R.framework/Resources/modules//R_X11.so':
## dlopen(/Library/Frameworks/R.framework/Resources/modules//R_X11.so, 0x0006): Library not loaded: /opt/X11/lib/libSM.6.dylib
## Referenced from: <7ECC4104-EC6A-38FD-9BEA-BFE0B870925C> /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/modules/R_X11.so
## Reason: tried: '/opt/X11/lib/libSM.6.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/opt/X11/lib/libSM.6.dylib' (no such file), '/opt/X11/lib/libSM.6.dylib' (no such file), '/Library/Frameworks/R.framework/Resources/lib/libSM.6.dylib' (no such file), '/Library/Java/JavaVirtualMachines/jdk-11.0.18+10/Contents/Home/lib/server/libSM.6.dylib' (no such file)
## Warning in cairoVersion(): unable to load shared object '/Library/Frameworks/R.framework/Resources/library/grDevices/libs//cairo.so':
## dlopen(/Library/Frameworks/R.framework/Resources/library/grDevices/libs//cairo.so, 0x0006): Library not loaded: /opt/X11/lib/libXrender.1.dylib
## Referenced from: <263B701F-2238-3478-A7D2-B7513231EF98> /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/grDevices/libs/cairo.so
## Reason: tried: '/opt/X11/lib/libXrender.1.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/opt/X11/lib/libXrender.1.dylib' (no such file), '/opt/X11/lib/libXrender.1.dylib' (no such file), '/Library/Frameworks/R.framework/Resources/lib/libXrender.1.dylib' (no such file), '/Library/Java/JavaVirtualMachines/jdk-11.0.18+10/Contents/Home/lib/server/libXrender.1.dylib' (no such file)
## Warning in device(filename = "dca.pdf", width = 7.5, height = 5, bg = "white"):
## failed to load cairo DLL
cat(sprintf("\nNet benefit at prevalence threshold (%.2f):\n", ir_prev))
##
## Net benefit at prevalence threshold (0.31):
for (m in c("CGM","SW","Baseline","All","None")) {
sub <- dca_df[dca_df$model == m, ]
nb <- sub$net_benefit[which.min(abs(sub$threshold - ir_prev))]
cat(sprintf(" %-10s NB = %.4f\n", m, nb))
}
## CGM NB = 0.1314
## SW NB = 0.1224
## Baseline NB = 0.1003
## All NB = 0.0021
## None NB = 0.0000
write.csv(dca_df, "dca_results.csv", row.names = FALSE)
feature_name_map <- c(
"bmi" = "BMI",
"mean_fasting_glucose" = "Mean Fasting Glucose",
"sd_fasting_glucose" = "SD Fasting Glucose",
"excursions_per_day" = "Glucose Excursions / Day",
"rec_time_50pct_median" = "Recovery Time (50th pct)",
"overall_max_glucose" = "Overall Max Glucose",
"age" = "Age",
"whr" = "WHR",
"stress_overall_daily_mean_stress" = "Mean Stress (HRV-based)",
"sd_hr_day_night_diff" = "HR Day-Night Variability"
)
rename_features <- function(x)
ifelse(x %in% names(feature_name_map), feature_name_map[x], x)
aggregate_shap <- function(shap_list, model_filter) {
entries <- Filter(function(x) x$model_name == model_filter, shap_list)
mat <- do.call(rbind, lapply(entries,
function(e) colMeans(abs(as.data.frame(e$shap)))))
data.frame(feature=colnames(mat), mean_shap=colMeans(mat),
sd_shap=apply(mat,2,sd), row.names=NULL) %>%
arrange(desc(mean_shap)) %>%
mutate(pct_share = mean_shap / sum(mean_shap) * 100,
feature = rename_features(feature))
}
collect_shap_long <- function(shap_list, model_filter) {
entries <- Filter(function(x) x$model_name == model_filter, shap_list)
bind_rows(lapply(entries, function(e) {
sd <- as.data.frame(e$shap) %>%
mutate(obs=row_number(), iteration=e$iteration) %>%
pivot_longer(-c(obs,iteration), names_to="feature", values_to="shap_value") %>%
mutate(feature=rename_features(feature))
fd <- as.data.frame(e$X_df) %>%
mutate(obs=row_number()) %>%
pivot_longer(-obs, names_to="feature", values_to="feature_value") %>%
mutate(feature=rename_features(feature))
left_join(sd, fd, by=c("obs","feature"))
}))
}
rank_stability_df <- function(shap_list, model_filter) {
entries <- Filter(function(x) x$model_name == model_filter, shap_list)
mat <- do.call(rbind, lapply(entries,
function(e) colMeans(abs(as.data.frame(e$shap)))))
rank_mat <- t(apply(mat,1,rank))
data.frame(feature=colnames(rank_mat), mean_rank=colMeans(rank_mat),
iqr_rank=apply(rank_mat,2,IQR), row.names=NULL) %>%
arrange(mean_rank) %>%
mutate(feature=rename_features(feature))
}
panel_beeswarm <- function(shap_list, model_filter,
shared_x_range=NULL, is_external=FALSE) {
agg <- aggregate_shap(shap_list, model_filter)
long <- collect_shap_long(shap_list, model_filter)
lmap <- agg %>%
mutate(label=paste0(feature," (",round(pct_share,1),"%)")) %>%
dplyr::select(feature, label)
long <- long %>%
left_join(lmap, by="feature") %>%
group_by(feature) %>%
mutate(feat_scaled=(feature_value-min(feature_value,na.rm=TRUE)) /
(max(feature_value,na.rm=TRUE)-min(feature_value,na.rm=TRUE)+1e-9)) %>%
ungroup() %>%
mutate(label=factor(label, levels=lmap$label[match(rev(agg$feature),lmap$feature)]))
p <- ggplot(long, aes(x=shap_value, y=label, colour=feat_scaled)) +
geom_jitter(height=0.18, alpha=ifelse(is_external,0.9,0.5), size=1.5, stroke=0) +
scale_colour_gradient(low="#3B82C4", high="#E84545",
name="Feature\nvalue", breaks=c(0,1), labels=c("Low","High")) +
geom_vline(xintercept=0, linetype="dashed", colour="grey50", linewidth=0.4) +
labs(title=model_filter, x="SHAP value", y=NULL) +
theme_bw(base_size=11) +
theme(plot.title=element_text(face="bold",hjust=0.5,size=11),
legend.position="none", panel.grid.minor=element_blank(),
axis.text.y=element_text(size=9))
if (!is.null(shared_x_range)) p <- p + coord_cartesian(xlim=shared_x_range)
p
}
panel_bar <- function(shap_list, model_filter, shared_x_max=NULL) {
agg <- aggregate_shap(shap_list, model_filter) %>%
mutate(label=paste0(feature," (",round(pct_share,1),"%)"),
label=factor(label, levels=rev(paste0(feature," (",round(pct_share,1),"%)"))),
ymin_eb=pmax(mean_shap-sd_shap,0), ymax_eb=mean_shap+sd_shap)
p <- ggplot(agg, aes(x=mean_shap, y=label)) +
geom_col(fill="#4C72B0", alpha=0.85) +
geom_errorbarh(aes(xmin=ymin_eb,xmax=ymax_eb), height=0.3, colour="grey30") +
labs(title=model_filter, x="Mean |SHAP|", y=NULL) +
theme_bw(base_size=11) +
theme(plot.title=element_text(face="bold",hjust=0.5,size=11),
panel.grid.minor=element_blank(), axis.text.y=element_text(size=9))
if (!is.null(shared_x_max)) p <- p + coord_cartesian(xlim=c(0,shared_x_max))
p
}
panel_rank <- function(shap_list, model_filter, shared_x_range=NULL) {
rs <- rank_stability_df(shap_list, model_filter) %>%
mutate(label=factor(feature, levels=rev(feature)))
p <- ggplot(rs, aes(x=mean_rank, y=label)) +
geom_point(size=3, colour="#2ca02c") +
geom_errorbarh(aes(xmin=mean_rank-iqr_rank/2,xmax=mean_rank+iqr_rank/2),
height=0.3, colour="grey40") +
scale_x_reverse() +
labs(title=model_filter, x="Mean rank (lower = more important)", y=NULL) +
theme_bw(base_size=11) +
theme(plot.title=element_text(face="bold",hjust=0.5,size=11),
panel.grid.minor=element_blank(), axis.text.y=element_text(size=9))
if (!is.null(shared_x_range)) p <- p + coord_cartesian(xlim=shared_x_range)
p
}
make_shap_figure <- function(shap_list, dataset_label, model_names_vec,
plot_type=c("beeswarm","bar","rank"),
panel_labels=LETTERS[seq_along(model_names_vec)]) {
plot_type <- match.arg(plot_type)
if (plot_type=="beeswarm") {
all_vals <- unlist(lapply(model_names_vec, function(mn)
unlist(lapply(Filter(function(x) x$model_name==mn, shap_list),
function(e) as.numeric(as.matrix(e$shap))))))
srng <- c(-max(abs(all_vals),na.rm=TRUE)*1.05,
max(abs(all_vals),na.rm=TRUE)*1.05)
}
if (plot_type=="bar") {
am <- unlist(lapply(model_names_vec, function(mn) {
a <- aggregate_shap(shap_list,mn); a$mean_shap+a$sd_shap }))
sxmax <- max(am,na.rm=TRUE)*1.05
}
if (plot_type=="rank") {
nf <- max(unlist(lapply(model_names_vec, function(mn)
nrow(rank_stability_df(shap_list,mn)))))
srng <- c(nf+0.5,0.5)
}
panels <- lapply(seq_along(model_names_vec), function(j) {
mn <- model_names_vec[rev(seq_along(model_names_vec))[j]]
p <- switch(plot_type,
beeswarm=panel_beeswarm(shap_list,mn,shared_x_range=srng),
bar=panel_bar(shap_list,mn,shared_x_max=sxmax),
rank=panel_rank(shap_list,mn,shared_x_range=srng))
p + labs(tag=panel_labels[j]) +
theme(plot.tag=element_text(face="bold",size=13))
})
subtitle <- switch(plot_type,
beeswarm="Each point = one observation x one bootstrap iteration. Labels = mean |SHAP| share (%).",
bar="Error bars = ±1 SD across bootstrap iterations. Labels = mean |SHAP| share (%).",
rank="Error bars = IQR across bootstrap iterations. Leftward = more important.")
if (plot_type=="beeswarm") {
dummy <- ggplot(data.frame(x=0,y=0,v=c(0,1)),aes(x,y,colour=v)) +
geom_point() +
scale_colour_gradient(low="#3B82C4",high="#E84545",name="Feature\nvalue",
breaks=c(0,1),labels=c("Low","High")) +
theme(legend.position="right",legend.title=element_text(size=9),
legend.text=element_text(size=8),legend.key.height=unit(1.2,"cm"))
leg <- cowplot::get_legend(dummy)
combined <- (Reduce(`|`,panels)|patchwork::wrap_elements(leg)) +
plot_layout(widths=c(rep(1,length(panels)),0.15))
} else {
combined <- Reduce(`|`,panels)
}
combined + plot_annotation(
title=paste0(switch(plot_type,beeswarm="SHAP Summary",
bar="Mean |SHAP| Feature Importance",
rank="Feature Rank Stability")," — ",dataset_label),
caption=subtitle,
theme=theme(plot.title=element_text(face="bold",hjust=0.5,size=13),
plot.caption=element_text(size=10,colour="grey40",hjust=0.5)))
}
Aggregated across 100 bootstrap iterations. Each iteration contributes one SHAP matrix from its own model trained on 70% of the internal data.
for (pt in c("beeswarm","bar","rank")) {
fig <- make_shap_figure(all_shap_val, "Internal Validation", model_names, pt)
print(fig)
ggsave(sprintf("shap_%s_internal.pdf", pt), fig,
width=15, height=5.5, device=cairo_pdf)
}
SHAP values are averaged across all 20 seeds: for each seed the model is refitted on all internal data and SHAP is computed for the 61 external patients. The 20 matrices are averaged element-wise before plotting. This ensures the SHAP figure reflects the same ensemble used for reported metrics.
compute_ensemble_shap <- function(features, df_train, df_ext,
N_SEEDS = 20, nsim = 50) {
p <- length(features)
mtry <- floor(sqrt(p))
X_ext_raw <- df_ext %>%
dplyr::select(all_of(features)) %>%
mutate(across(where(is.numeric), as.double))
shap_matrices <- vector("list", N_SEEDS)
X_ext_imp_ref <- NULL
for (s in seq_len(N_SEEDS)) {
set.seed(s)
X_tr <- df_train %>%
dplyr::select(all_of(features)) %>%
mutate(across(where(is.numeric), as.double))
y_tr <- factor(make.names(trimws(df_train$IR_label)))
imp <- caret::preProcess(X_tr, method = "medianImpute")
X_tr_imp <- predict(imp, X_tr)
X_ext_imp <- predict(imp, X_ext_raw)
up_data <- caret::upSample(x=X_tr_imp, y=y_tr, yname=".outcome")
rf <- randomForest::randomForest(
x=up_data[,names(X_tr_imp),drop=FALSE], y=up_data$.outcome,
ntree=300, mtry=mtry, nodesize=1)
shap_s <- fastshap::explain(
object = rf,
X = as.data.frame(X_ext_imp),
pred_wrapper = function(object, newdata)
predict(object, newdata, type="prob")[,"IR"],
nsim = nsim,
adjust = TRUE
)
shap_matrices[[s]] <- as.matrix(shap_s)
if (s == 1) X_ext_imp_ref <- as.data.frame(X_ext_imp)
cat(sprintf(" seed %2d / %d\n", s, N_SEEDS))
}
shap_avg <- as.data.frame(Reduce("+", shap_matrices) / N_SEEDS)
list(shap = shap_avg, X_df = X_ext_imp_ref)
}
cat("Computing ensemble SHAP for external cohort (20 seeds x 3 models)...\n")
## Computing ensemble SHAP for external cohort (20 seeds x 3 models)...
cat(" CGM model:\n")
## CGM model:
es_CGM <- compute_ensemble_shap(feature_sets$CGM_model, df_test, df_external)
## seed 1 / 20
## seed 2 / 20
## seed 3 / 20
## seed 4 / 20
## seed 5 / 20
## seed 6 / 20
## seed 7 / 20
## seed 8 / 20
## seed 9 / 20
## seed 10 / 20
## seed 11 / 20
## seed 12 / 20
## seed 13 / 20
## seed 14 / 20
## seed 15 / 20
## seed 16 / 20
## seed 17 / 20
## seed 18 / 20
## seed 19 / 20
## seed 20 / 20
cat(" Smartwatch model:\n")
## Smartwatch model:
es_SW <- compute_ensemble_shap(feature_sets$Smartwatch_model, df_test, df_external)
## seed 1 / 20
## seed 2 / 20
## seed 3 / 20
## seed 4 / 20
## seed 5 / 20
## seed 6 / 20
## seed 7 / 20
## seed 8 / 20
## seed 9 / 20
## seed 10 / 20
## seed 11 / 20
## seed 12 / 20
## seed 13 / 20
## seed 14 / 20
## seed 15 / 20
## seed 16 / 20
## seed 17 / 20
## seed 18 / 20
## seed 19 / 20
## seed 20 / 20
cat(" Baseline model:\n")
## Baseline model:
es_Base <- compute_ensemble_shap(feature_sets$Baseline_model, df_test, df_external)
## seed 1 / 20
## seed 2 / 20
## seed 3 / 20
## seed 4 / 20
## seed 5 / 20
## seed 6 / 20
## seed 7 / 20
## seed 8 / 20
## seed 9 / 20
## seed 10 / 20
## seed 11 / 20
## seed 12 / 20
## seed 13 / 20
## seed 14 / 20
## seed 15 / 20
## seed 16 / 20
## seed 17 / 20
## seed 18 / 20
## seed 19 / 20
## seed 20 / 20
all_shap_ext_final <- list(
list(iteration=1, model_name="Model CGM", shap=es_CGM$shap, X_df=es_CGM$X_df),
list(iteration=1, model_name="Model smartwatch", shap=es_SW$shap, X_df=es_SW$X_df),
list(iteration=1, model_name="Model baseline", shap=es_Base$shap, X_df=es_Base$X_df)
)
make_shap_ext_figure <- function(shap_list, model_names_vec,
panel_labels=LETTERS[seq_along(model_names_vec)]) {
all_vals <- unlist(lapply(model_names_vec, function(mn)
unlist(lapply(Filter(function(x) x$model_name==mn, shap_list),
function(e) as.numeric(as.matrix(e$shap))))))
xmax <- max(abs(all_vals),na.rm=TRUE)*1.05; srng <- c(-xmax,xmax)
panels <- lapply(seq_along(model_names_vec), function(j) {
panel_beeswarm(shap_list,model_names_vec[j],
shared_x_range=srng, is_external=TRUE) +
labs(tag=panel_labels[j]) +
theme(plot.tag=element_text(face="bold",size=13))
})
dummy <- ggplot(data.frame(x=0,y=0,v=c(0,1)),aes(x,y,colour=v)) +
geom_point() +
scale_colour_gradient(low="#3B82C4",high="#E84545",name="Feature\nvalue",
breaks=c(0,1),labels=c("Low","High")) +
theme(legend.position="right",legend.title=element_text(size=9),
legend.text=element_text(size=8),legend.key.height=unit(1.2,"cm"))
leg <- cowplot::get_legend(dummy)
(Reduce(`|`,panels)|patchwork::wrap_elements(leg)) +
plot_layout(widths=c(rep(1,length(panels)),0.15)) +
plot_annotation(
title = "SHAP Summary — External Test Set",
caption = paste0("Each point = one external patient. ",
"SHAP values averaged across 20 independently fitted models. ",
"Labels = mean |SHAP| share (%)."),
theme = theme(
plot.title = element_text(face="bold",hjust=0.5,size=13),
plot.caption= element_text(size=10,colour="grey40",hjust=0.5)))
}
fig_ext <- make_shap_ext_figure(all_shap_ext_final, model_names)
print(fig_ext)
ggsave("shap_beeswarm_external.pdf", fig_ext, width=15, height=5.5, device=cairo_pdf)
(p_dca + labs(title="A Decision curve analysis")) |
(p_seed_auc + labs(title="B AUC-ROC seed stability")) |
(p_seed_auprc + labs(title="C AUPRC seed stability")) |
(p_seed_auprg + labs(title="D AUPRG seed stability"))
sessionInfo()
## R version 4.5.2 (2025-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.5
##
## Matrix products: default
## BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/Zurich
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] cowplot_1.2.0 patchwork_1.3.2 tidyr_1.3.2
## [4] prg_0.5.1 PRROC_1.4 rlang_1.2.0
## [7] pROC_1.19.0.1 caret_7.0-1 lattice_0.22-7
## [10] ggplot2_4.0.1 dplyr_1.2.0 fastshap_0.1.1
## [13] randomForest_4.7-1.2
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.1 timeDate_4052.112 farver_2.1.2
## [4] S7_0.2.1 fastmap_1.2.0 digest_0.6.39
## [7] rpart_4.1.24 timechange_0.3.0 lifecycle_1.0.5
## [10] survival_3.8-3 magrittr_2.0.4 compiler_4.5.2
## [13] sass_0.4.10 tools_4.5.2 utf8_1.2.6
## [16] yaml_2.3.12 data.table_1.18.0 knitr_1.51
## [19] labeling_0.4.3 plyr_1.8.9 RColorBrewer_1.1-3
## [22] withr_3.0.2 purrr_1.2.0 nnet_7.3-20
## [25] grid_4.5.2 stats4_4.5.2 e1071_1.7-17
## [28] future_1.69.0 globals_0.19.1 scales_1.4.0
## [31] iterators_1.0.14 MASS_7.3-65 cli_3.6.6
## [34] rmarkdown_2.30 generics_0.1.4 otel_0.2.0
## [37] rstudioapi_0.17.1 future.apply_1.20.2 reshape2_1.4.5
## [40] cachem_1.1.0 proxy_0.4-29 stringr_1.6.0
## [43] splines_4.5.2 parallel_4.5.2 vctrs_0.7.2
## [46] hardhat_1.4.2 Matrix_1.7-4 jsonlite_2.0.0
## [49] listenv_0.10.1 foreach_1.5.2 gower_1.0.2
## [52] jquerylib_0.1.4 recipes_1.3.1 glue_1.8.0
## [55] parallelly_1.46.1 codetools_0.2-20 lubridate_1.9.4
## [58] stringi_1.8.7 gtable_0.3.6 tibble_3.3.0
## [61] pillar_1.11.1 htmltools_0.5.9 ipred_0.9-15
## [64] lava_1.8.2 R6_2.6.1 evaluate_1.0.5
## [67] bslib_0.9.0 class_7.3-23 Rcpp_1.1.1
## [70] nlme_3.1-168 prodlim_2026.03.11 xfun_0.55
## [73] pkgconfig_2.0.3 ModelMetrics_1.2.2.2