library(randomForest)
library(fastshap)
library(dplyr)
library(caret)
library(pROC)
library(PRROC)
library(prg)
library(tidyr)
library(patchwork)
library(cowplot)
library(ggplot2)
knitr::opts_chunk$set(message = FALSE, warning = FALSE, fig.align = "center", out.width = "100%")Two AI-READI cohorts: Study 1 (internal, n=97) and Study 2 (external, n=61).
df_test <- read.csv("~/Downloads/read-ai/df_test_f_v2.csv")
df_external <- read.csv("~/Downloads/read-ai/df_external_f_v2.csv")
cat(sprintf("Study 1: n=%d, IR=%d (%.1f%%)\n",
nrow(df_test), sum(df_test$IR_label=="IR"),
mean(df_test$IR_label=="IR")*100))## Study 1: n=97, IR=32 (33.0%)
cat(sprintf("Study 2: n=%d, IR=%d (%.1f%%)\n",
nrow(df_external), sum(df_external$IR_label=="IR"),
mean(df_external$IR_label=="IR")*100))## Study 2: n=61, IR=19 (31.1%)
Three modality-specific feature sets with fixed
mtry = floor(sqrt(p)).
feature_sets <- list(
CGM_model = c("bmi", "mean_fasting_glucose", "sd_fasting_glucose",
"excursions_per_day", "rec_time_50pct_median",
"overall_max_glucose", "age", "whr"),
Smartwatch_model = c("stress_overall_daily_mean_stress", "sd_hr_day_night_diff",
"age", "bmi", "whr"),
Baseline_model = c("bmi", "age", "whr")
)
for (nm in names(feature_sets)) {
p <- length(feature_sets[[nm]])
cat(sprintf("%-20s: p=%d, mtry=%d\n", nm, p, floor(sqrt(p))))
}## CGM_model : p=8, mtry=2
## Smartwatch_model : p=5, mtry=2
## Baseline_model : p=3, mtry=1
Pipeline: median imputation → upsampling → random forest (200 trees).
fit_rf_seed <- function(features, df_train, seed) {
set.seed(seed)
p <- length(features)
mtry <- floor(sqrt(p))
X_tr <- df_train %>%
dplyr::select(all_of(features)) %>%
mutate(across(where(is.numeric), as.double))
y_tr <- factor(make.names(trimws(df_train$IR_label)))
imp <- caret::preProcess(X_tr, method = "medianImpute")
X_tr_imp <- predict(imp, X_tr)
up_data <- caret::upSample(x = X_tr_imp, y = y_tr, yname = ".outcome")
rf <- randomForest::randomForest(
x = up_data[, names(X_tr_imp), drop = FALSE],
y = up_data$.outcome,
ntree = 200, mtry = mtry, nodesize = 1
)
list(rf = rf, imp = imp, features = features)
}
predict_rf <- function(fit_obj, df_new) {
X_new <- df_new %>%
dplyr::select(all_of(fit_obj$features)) %>%
mutate(across(where(is.numeric), as.double))
X_imp <- predict(fit_obj$imp, X_new)
predict(fit_obj$rf, X_imp, type = "prob")[, "IR"]
}
compute_auprg <- function(labels, scores) {
tryCatch(prg::calc_auprg(prg::create_prg_curve(labels, scores)),
error = function(e) NA_real_)
}Five stratified 70/30 splits on Study 1 for this debug/review copy.
set.seed(123)
# Original full-run code:
# n_iter <- 200
# added: keep this review copy fast while preserving the same workflow.
n_iter <- 5
bootstrap_seeds <- sample.int(99999L, n_iter)
all_results <- list()
all_shap_val <- list()
for (i in seq_len(n_iter)) {
y <- factor(trimws(df_test$IR_label))
levels(y) <- make.names(levels(y))
train_idx <- createDataPartition(y, p = 0.7, list = FALSE)
test_set <- df_test[-train_idx, ]
train_set <- df_test[ train_idx, ]
y_test <- factor(make.names(trimws(test_set$IR_label)))
iter_seed <- i * 7L
CGM_fit <- fit_rf_seed(feature_sets$CGM_model, train_set, iter_seed)
SW_fit <- fit_rf_seed(feature_sets$Smartwatch_model, train_set, iter_seed)
Base_fit <- fit_rf_seed(feature_sets$Baseline_model, train_set, iter_seed)
fits <- list(CGM_fit, SW_fit, Base_fit)
model_names <- c("CGM model", "Smartwatch model", "Baseline model")
iter_rows <- lapply(seq_along(fits), function(m) {
fit <- fits[[m]]
prob <- predict_rf(fit, test_set)
pred <- factor(ifelse(prob >= 0.5, "IR", "Non.IR"), levels = c("Non.IR","IR"))
if (length(unique(as.integer(y_test))) < 2) return(NULL)
cm <- confusionMatrix(pred, y_test)
sens <- as.numeric(cm$byClass["Sensitivity"])
spec <- as.numeric(cm$byClass["Specificity"])
prec <- as.numeric(cm$byClass["Pos Pred Value"])
data.frame(
Model = model_names[m],
Accuracy = as.numeric(cm$overall["Accuracy"]),
Sensitivity = sens,
Specificity = spec,
Precision = prec,
F1 = ifelse(is.na(prec)|prec+sens==0, NA_real_, 2*prec*sens/(prec+sens)),
AUC = as.numeric(auc(roc(y_test, prob, levels=c("Non.IR","IR"), quiet=TRUE))),
PRAUC = pr.curve(scores.class0=prob, weights.class0=ifelse(y_test=="IR",1,0),
curve=FALSE)$auc.integral,
AUPRG = compute_auprg(as.integer(y_test=="IR"), prob),
iteration = i
)
})
all_results[[i]] <- dplyr::bind_rows(iter_rows)
# SHAP values
for (m in seq_along(fits)) {
fit <- fits[[m]]
X_test_raw <- test_set %>%
dplyr::select(all_of(fit$features)) %>%
mutate(across(where(is.numeric), as.double))
X_test_imp <- predict(fit$imp, X_test_raw)
shap_s <- tryCatch(
fastshap::explain(
object = fit$rf,
X = as.data.frame(X_test_imp),
pred_wrapper = function(object, newdata)
predict(object, newdata, type="prob")[,"IR"],
nsim = 50, adjust = TRUE
),
error = function(e) NULL
)
if (!is.null(shap_s))
all_shap_val[[length(all_shap_val)+1]] <- list(
iteration = i,
model_name = model_names[m],
shap = shap_s,
X_df = as.data.frame(X_test_imp)
)
}
}
bootstrap_results <- dplyr::bind_rows(all_results)final_summary <- bootstrap_results %>%
group_by(Model) %>%
summarise(
across(c(AUC, PRAUC, AUPRG),
list(mean = ~mean(.x, na.rm=TRUE),
lo = ~quantile(.x, 0.025, na.rm=TRUE),
hi = ~quantile(.x, 0.975, na.rm=TRUE)),
.names = "{.col}_{.fn}"),
.groups = "drop"
)
final_summary %>%
select(Model, starts_with("AUC"), starts_with("PRAUC"), starts_with("AUPRG")) %>%
mutate(across(where(is.numeric), ~round(.x, 3)))Five models trained on entire Study 1, evaluated on Study 2 bootstrap resamples.
evaluate_external_bootstrap <- function(features, df_train, df_ext, seeds_vec,
model_label = NULL, nsim_shap = 50) {
y_ext_fac <- factor(make.names(trimws(df_ext$IR_label)))
y_ext_bin <- as.integer(y_ext_fac == "IR")
n_ext <- length(y_ext_bin)
pos_idx <- which(y_ext_bin == 1)
neg_idx <- which(y_ext_bin == 0)
n_iter <- length(seeds_vec)
iter_aucs <- numeric(n_iter)
iter_auprcs <- numeric(n_iter)
iter_auprgs <- numeric(n_iter)
all_probs_full <- matrix(NA_real_, nrow = n_ext, ncol = n_iter)
shap_matrices <- vector("list", n_iter)
X_ext_ref <- NULL
for (i in seq_len(n_iter)) {
seed <- seeds_vec[i]
fit <- fit_rf_seed(features, df_train, seed = seed)
X_ext_raw <- df_ext %>%
dplyr::select(all_of(features)) %>%
mutate(across(where(is.numeric), as.double))
X_ext_imp <- predict(fit$imp, X_ext_raw)
if (i == 1) X_ext_ref <- as.data.frame(X_ext_imp)
prob_full <- predict(fit$rf, X_ext_imp, type="prob")[,"IR"]
all_probs_full[,i] <- prob_full
set.seed(seed)
boot_idx <- c(sample(pos_idx, length(pos_idx), replace=TRUE),
sample(neg_idx, length(neg_idx), replace=TRUE))
y_boot <- y_ext_bin[boot_idx]
prob_boot <- prob_full[boot_idx]
if (length(unique(y_boot)) < 2) {
iter_aucs[i] <- iter_auprcs[i] <- iter_auprgs[i] <- NA_real_
} else {
iter_aucs[i] <- as.numeric(
auc(roc(y_boot, prob_boot, levels=c(0,1), direction="<", quiet=TRUE))
)
iter_auprcs[i] <- tryCatch(
pr.curve(scores.class0=prob_boot, weights.class0=y_boot,
curve=FALSE)$auc.integral,
error = function(e) NA_real_
)
iter_auprgs[i] <- compute_auprg(y_boot, prob_boot)
}
shap_matrices[[i]] <- tryCatch(
as.matrix(fastshap::explain(
object = fit$rf,
X = as.data.frame(X_ext_imp),
pred_wrapper = function(object, newdata)
predict(object, newdata, type="prob")[,"IR"],
nsim = nsim_shap, adjust = TRUE
)),
error = function(e) NULL
)
}
ens <- rowMeans(all_probs_full, na.rm=TRUE)
ens_pred <- factor(ifelse(ens >= 0.5, "IR", "Non.IR"), levels=c("Non.IR","IR"))
ens_auc <- as.numeric(
auc(roc(y_ext_fac, ens, levels=c("Non.IR","IR"), quiet=TRUE))
)
ens_auprc <- pr.curve(scores.class0=ens, weights.class0=y_ext_bin,
curve=FALSE)$auc.integral
ens_auprg <- compute_auprg(y_ext_bin, ens)
valid_shap <- Filter(Negate(is.null), shap_matrices)
ensemble_shap <- if (length(valid_shap) > 0)
as.data.frame(Reduce("+", valid_shap) / length(valid_shap))
else NULL
shap_list_per_iter <- lapply(seq_len(n_iter), function(i) {
if (is.null(shap_matrices[[i]])) return(NULL)
list(iteration = i, model_name = model_label,
shap = as.data.frame(shap_matrices[[i]]), X_df = X_ext_ref)
})
shap_list_per_iter <- Filter(Negate(is.null), shap_list_per_iter)
list(
auc_mean = round(mean(iter_aucs, na.rm=TRUE), 3),
auc_lo = round(quantile(iter_aucs, 0.025, na.rm=TRUE), 3),
auc_hi = round(quantile(iter_aucs, 0.975, na.rm=TRUE), 3),
auprc_mean = round(mean(iter_auprcs, na.rm=TRUE), 3),
auprc_lo = round(quantile(iter_auprcs, 0.025, na.rm=TRUE), 3),
auprc_hi = round(quantile(iter_auprcs, 0.975, na.rm=TRUE), 3),
auprg_mean = round(mean(iter_auprgs, na.rm=TRUE), 3),
auprg_lo = round(quantile(iter_auprgs, 0.025, na.rm=TRUE), 3),
auprg_hi = round(quantile(iter_auprgs, 0.975, na.rm=TRUE), 3),
iter_aucs = iter_aucs,
iter_auprcs = iter_auprcs,
iter_auprgs = iter_auprgs,
all_probs_matrix = all_probs_full,
ens_auc = round(ens_auc, 3),
ens_auprc = round(ens_auprc, 3),
ens_auprg = round(ens_auprg, 3),
ensemble_prob = ens,
y_ext = y_ext_fac,
cm = confusionMatrix(ens_pred, y_ext_fac),
ensemble_shap = ensemble_shap,
shap_list_per_iter = shap_list_per_iter,
X_ext_ref = X_ext_ref,
model_label = model_label
)
}
ext_CGM <- evaluate_external_bootstrap(
feature_sets$CGM_model, df_test, df_external, bootstrap_seeds, "CGM model")
ext_SW <- evaluate_external_bootstrap(
feature_sets$Smartwatch_model, df_test, df_external, bootstrap_seeds, "Smartwatch model")
ext_Base <- evaluate_external_bootstrap(
feature_sets$Baseline_model, df_test, df_external, bootstrap_seeds, "Baseline model")
ext_list <- list("CGM model" = ext_CGM, "Smartwatch model" = ext_SW,
"Baseline model" = ext_Base)ext_summary <- dplyr::bind_rows(lapply(names(ext_list), function(nm) {
r <- ext_list[[nm]]
data.frame(
Model = nm,
AUC_mean = r$auc_mean, AUC_lo = r$auc_lo, AUC_hi = r$auc_hi,
AUPRC_mean = r$auprc_mean, AUPRC_lo = r$auprc_lo, AUPRC_hi = r$auprc_hi,
AUPRG_mean = r$auprg_mean, AUPRG_lo = r$auprg_lo, AUPRG_hi = r$auprg_hi
)
}))
ext_summaryfeature_name_map <- c(
"bmi" = "BMI",
"mean_fasting_glucose" = "Mean Fasting Glucose",
"sd_fasting_glucose" = "SD Fasting Glucose",
"excursions_per_day" = "Glucose Excursions / Day",
"rec_time_50pct_median" = "Recovery Time (50th pct)",
"overall_max_glucose" = "Overall Max Glucose",
"age" = "Age",
"whr" = "WHR",
"stress_overall_daily_mean_stress" = "Mean Stress (HRV)",
"sd_hr_day_night_diff" = "HR Day-Night Variability"
)
rename_features <- function(x)
ifelse(x %in% names(feature_name_map), feature_name_map[x], x)
aggregate_shap <- function(shap_list, model_filter) {
entries <- Filter(function(x) x$model_name == model_filter, shap_list)
mat <- do.call(rbind, lapply(entries,
function(e) colMeans(abs(as.data.frame(e$shap)))))
data.frame(feature = colnames(mat), mean_shap = colMeans(mat),
sd_shap = apply(mat, 2, sd), row.names = NULL) %>%
arrange(desc(mean_shap)) %>%
mutate(pct_share = mean_shap / sum(mean_shap) * 100,
feature = rename_features(feature))
}
collect_shap_long <- function(shap_list, model_filter) {
entries <- Filter(function(x) x$model_name == model_filter, shap_list)
bind_rows(lapply(entries, function(e) {
sd <- as.data.frame(e$shap) %>%
mutate(obs = row_number(), iteration = e$iteration) %>%
pivot_longer(-c(obs, iteration), names_to = "feature", values_to = "shap_value") %>%
mutate(feature = rename_features(feature))
fd <- as.data.frame(e$X_df) %>%
mutate(obs = row_number()) %>%
pivot_longer(-obs, names_to = "feature", values_to = "feature_value") %>%
mutate(feature = rename_features(feature))
left_join(sd, fd, by = c("obs", "feature"))
}))
}
panel_beeswarm <- function(shap_list, model_filter, shared_x_range = NULL,
is_external = FALSE, base_size = 14) {
agg <- aggregate_shap(shap_list, model_filter)
long <- collect_shap_long(shap_list, model_filter)
lmap <- agg %>%
mutate(label = paste0(feature, " (", round(pct_share, 1), "%)")) %>%
dplyr::select(feature, label)
long <- long %>%
left_join(lmap, by = "feature") %>%
group_by(feature) %>%
mutate(feat_scaled = (feature_value - min(feature_value, na.rm=TRUE)) /
(max(feature_value, na.rm=TRUE) - min(feature_value, na.rm=TRUE) + 1e-9)) %>%
ungroup() %>%
mutate(label = factor(label,
levels = lmap$label[match(rev(agg$feature), lmap$feature)]))
title_clean <- gsub("^Model ", "", model_filter)
title_clean <- paste0(toupper(substring(title_clean, 1, 1)),
substring(title_clean, 2))
p <- ggplot(long, aes(x = shap_value, y = label, colour = feat_scaled)) +
geom_jitter(height = 0.20,
alpha = ifelse(is_external, 0.80, 0.35),
size = ifelse(is_external, 0.9, 0.6), stroke = 0) +
scale_colour_gradient(low = "#3B82C4", high = "#E84545",
name = "Feature\nvalue", breaks = c(0,1),
labels = c("Low","High")) +
geom_vline(xintercept = 0, linetype = "dashed",
colour = "grey60", linewidth = 0.4) +
labs(title = title_clean, x = "SHAP value", y = NULL) +
scale_y_discrete(expand = expansion(add = 0.5)) +
theme_bw(base_size = base_size) +
theme(
plot.title = element_text(face = "bold", hjust = 0.5, size = base_size),
axis.text.y = element_text(size = base_size-2, face = "bold"),
axis.text.x = element_text(size = base_size-4),
axis.title.x = element_text(size = base_size-3),
legend.position = "none",
panel.grid.minor = element_blank()
)
if (!is.null(shared_x_range))
p <- p + coord_cartesian(xlim = shared_x_range)
p
}model_names <- c("CGM model", "Smartwatch model", "Baseline model")
all_vals <- unlist(lapply(model_names, function(mn)
unlist(lapply(Filter(function(x) x$model_name == mn, all_shap_val),
function(e) as.numeric(as.matrix(e$shap))))))
srng <- c(-max(abs(all_vals), na.rm=TRUE)*1.05, max(abs(all_vals), na.rm=TRUE)*1.05)
panels <- lapply(seq_along(model_names), function(j) {
panel_beeswarm(all_shap_val, model_names[j], shared_x_range = srng) +
labs(tag = LETTERS[j]) +
theme(plot.tag = element_text(face = "bold", size = 15))
})
dummy <- ggplot(data.frame(x=0, y=0, v=c(0,1)), aes(x, y, colour=v)) +
geom_point() +
scale_colour_gradient(low = "#3B82C4", high = "#E84545",
name = "Feature\nvalue", breaks = c(0,1),
labels = c("Low","High")) +
theme(legend.position = "right",
legend.title = element_text(size = 10),
legend.text = element_text(size = 9),
legend.key.height = unit(1.2, "cm"))
leg <- cowplot::get_legend(dummy)
(Reduce(`|`, panels) | patchwork::wrap_elements(leg)) +
plot_layout(widths = c(rep(1, length(panels)), 0.15)) +
plot_annotation(
title = "SHAP Beeswarm — Internal Validation",
theme = theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 14)))all_shap_ext <- list(
list(iteration = 1, model_name = "CGM model",
shap = ext_CGM$ensemble_shap, X_df = ext_CGM$X_ext_ref),
list(iteration = 1, model_name = "Smartwatch model",
shap = ext_SW$ensemble_shap, X_df = ext_SW$X_ext_ref),
list(iteration = 1, model_name = "Baseline model",
shap = ext_Base$ensemble_shap, X_df = ext_Base$X_ext_ref)
)
all_vals_ext <- unlist(lapply(model_names, function(mn)
unlist(lapply(Filter(function(x) x$model_name == mn, all_shap_ext),
function(e) as.numeric(as.matrix(e$shap))))))
srng_ext <- c(-max(abs(all_vals_ext), na.rm=TRUE)*1.05,
max(abs(all_vals_ext), na.rm=TRUE)*1.05)
panels_ext <- lapply(seq_along(model_names), function(j) {
panel_beeswarm(all_shap_ext, model_names[j], shared_x_range = srng_ext,
is_external = TRUE) +
labs(tag = LETTERS[j]) +
theme(plot.tag = element_text(face = "bold", size = 15))
})
(Reduce(`|`, panels_ext) | patchwork::wrap_elements(leg)) +
plot_layout(widths = c(rep(1, length(panels_ext)), 0.15)) +
plot_annotation(
title = "SHAP Beeswarm — External Validation",
theme = theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 14)))# ==============================================================================
# PANEL A: ROC CURVE WITH MONOTONIC 95% CI
# ==============================================================================
y_ext_fac <- ext_CGM$y_ext
y_ext_bin <- as.integer(y_ext_fac == "IR")
ir_prev <- mean(y_ext_bin)
# Function to compute MONOTONIC ROC curve with 95% CI from bootstrap
roc_band_monotonic <- function(prob_matrix, y_fac, n_boot = 200) {
fpr_grid <- seq(0, 1, by = 0.01)
n_grid <- length(fpr_grid)
tpr_mat <- matrix(NA_real_, nrow = n_grid, ncol = n_boot)
for (i in seq_len(n_boot)) {
prob_i <- prob_matrix[, i]
if (all(is.na(prob_i))) next
r <- tryCatch(
pROC::roc(y_fac, prob_i, levels = c("Non.IR", "IR"),
direction = "<", quiet = TRUE),
error = function(e) NULL
)
if (is.null(r)) next
fpr_i <- 1 - r$specificities
tpr_i <- r$sensitivities
# Sort by FPR (required for proper interpolation)
ord <- order(fpr_i)
fpr_i <- fpr_i[ord]
tpr_i <- tpr_i[ord]
# Interpolate onto common grid
tpr_interp <- approx(fpr_i, tpr_i, xout = fpr_grid,
method = "linear", rule = 2)$y
# ENFORCE MONOTONICITY
tpr_interp <- cummax(tpr_interp)
tpr_mat[, i] <- tpr_interp
}
tpr_mean <- rowMeans(tpr_mat, na.rm = TRUE)
tpr_lo <- apply(tpr_mat, 1, quantile, probs = 0.025, na.rm = TRUE)
tpr_hi <- apply(tpr_mat, 1, quantile, probs = 0.975, na.rm = TRUE)
# Final enforcement on mean
tpr_mean <- cummax(tpr_mean)
tpr_lo <- cummax(tpr_lo)
tpr_hi <- cummax(tpr_hi)
data.frame(fpr = fpr_grid, tpr = tpr_mean, lo = tpr_lo, hi = tpr_hi)
}
# Original full-run code:
# band_cgm <- roc_band_monotonic(ext_CGM$all_probs_matrix, y_ext_fac, 200)
# band_sw <- roc_band_monotonic(ext_SW$all_probs_matrix, y_ext_fac, 200)
# band_base <- roc_band_monotonic(ext_Base$all_probs_matrix, y_ext_fac, 200)
# added: use the active iteration count so this 5-iteration copy stays consistent.
band_cgm <- roc_band_monotonic(ext_CGM$all_probs_matrix, y_ext_fac, n_iter)
band_sw <- roc_band_monotonic(ext_SW$all_probs_matrix, y_ext_fac, n_iter)
band_base <- roc_band_monotonic(ext_Base$all_probs_matrix, y_ext_fac, n_iter)
band_cgm$model <- "CGM model"
band_sw$model <- "Smartwatch model"
band_base$model <- "Baseline model"
band_df <- rbind(band_cgm, band_sw, band_base) %>%
mutate(model = factor(model, levels = c("Baseline model", "Smartwatch model", "CGM model")))
# Original code: plotted ROC from ensemble-averaged probabilities. This did
# not match the shaded CI, which is computed from per-seed ROC curves.
# make_roc_step <- function(prob, y_fac, model_name) {
# r <- pROC::roc(y_fac, prob, levels = c("Non.IR", "IR"), direction = "<", quiet = TRUE)
# data.frame(fpr = 1 - r$specificities, tpr = r$sensitivities, model = model_name)
# }
#
# ext_probs <- list(
# CGM = ext_CGM$ensemble_prob,
# Smartwatch = ext_SW$ensemble_prob,
# Baseline = ext_Base$ensemble_prob
# )
#
# step_df <- rbind(
# make_roc_step(ext_probs$CGM, y_ext_fac, "CGM model"),
# make_roc_step(ext_probs$Smartwatch, y_ext_fac, "Smartwatch model"),
# make_roc_step(ext_probs$Baseline, y_ext_fac, "Baseline model")
# ) %>% mutate(model = factor(model, levels = c("Baseline model", "Smartwatch model", "CGM model")))
#
# added: plot the pointwise mean ROC from the same per-seed curves that define
# the shaded confidence bands, and sort explicitly before geom_step().
step_df <- band_df %>%
dplyr::select(fpr, tpr, model) %>%
dplyr::arrange(model, fpr, tpr)
# AUC stats for legend
compute_auc_stats <- function(prob_matrix, y_bin) {
aucs <- apply(prob_matrix, 2, function(p) {
if (all(is.na(p))) return(NA_real_)
tryCatch({
roc_obj <- pROC::roc(y_bin, p, levels = c(0, 1), direction = "<", quiet = TRUE)
as.numeric(pROC::auc(roc_obj))
}, error = function(e) NA_real_)
})
list(mean = mean(aucs, na.rm=TRUE), sd = sd(aucs, na.rm=TRUE),
lo = quantile(aucs, 0.025, na.rm=TRUE), hi = quantile(aucs, 0.975, na.rm=TRUE))
}
auc_cgm <- compute_auc_stats(ext_CGM$all_probs_matrix, y_ext_bin)
auc_sw <- compute_auc_stats(ext_SW$all_probs_matrix, y_ext_bin)
auc_base <- compute_auc_stats(ext_Base$all_probs_matrix, y_ext_bin)
legend_labels <- c(
"Baseline model" = sprintf("Baseline model AUC = %.3f ±%.3f [%.3f–%.3f]",
auc_base$mean, auc_base$sd, auc_base$lo, auc_base$hi),
"Smartwatch model" = sprintf("Smartwatch model AUC = %.3f ±%.3f [%.3f–%.3f]",
auc_sw$mean, auc_sw$sd, auc_sw$lo, auc_sw$hi),
"CGM model" = sprintf("CGM model AUC = %.3f ±%.3f [%.3f–%.3f]",
auc_cgm$mean, auc_cgm$sd, auc_cgm$lo, auc_cgm$hi)
)
model_cols <- c("CGM model" = "#1f77b4", "Smartwatch model" = "#2ca02c",
"Baseline model" = "#d62728")
p_roc <- ggplot() +
geom_ribbon(data = band_df, aes(x = fpr, ymin = lo, ymax = hi, fill = model),
alpha = 0.20, show.legend = FALSE) +
geom_step(data = step_df, aes(x = fpr, y = tpr, colour = model),
linewidth = 1.2, direction = "hv") +
geom_abline(slope = 1, intercept = 0, linetype = "dashed",
colour = "grey50", linewidth = 0.8) +
annotate("text", x = 0.70, y = 0.63, label = "Chance",
angle = 38, colour = "grey50", size = 5.5) +
# Original code hard-coded the full-run iteration count:
# annotate("text", x = 0.02, y = 0.96,
# label = "Shaded band = 95% CI\nacross 200 bootstraps",
# hjust = 0, vjust = 1, size = 5, colour = "grey40") +
# added: report the actual iteration count used by this script.
annotate("text", x = 0.02, y = 0.96,
label = sprintf("Shaded band = 95%% CI\nacross %d bootstraps", n_iter),
hjust = 0, vjust = 1, size = 5, colour = "grey40") +
scale_colour_manual(values = model_cols, labels = legend_labels, name = NULL) +
scale_fill_manual(values = model_cols, guide = "none") +
scale_x_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1),
expand = expansion(mult = 0.01)) +
scale_y_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1),
expand = expansion(mult = 0.01)) +
coord_equal() +
labs(title = "A", x = "1 − Specificity (FPR)", y = "Sensitivity (TPR)") +
theme_bw(base_size = 18) +
theme(
plot.title = element_text(face = "bold", hjust = 0, size = 28),
axis.title = element_text(size = 20, face = "bold"),
axis.text = element_text(size = 18),
legend.position = c(0.62, 0.22),
legend.background = element_rect(fill = "white", colour = "grey70", linewidth = 0.5),
legend.text = element_text(size = 14),
legend.spacing.y = unit(6, "pt"),
panel.grid.minor = element_blank()
)
# ==============================================================================
# PANEL B: DECISION CURVE ANALYSIS
# ==============================================================================
# added: keep ensemble probabilities for the DCA panel only. These were
# previously also used for the ROC line, which created the mismatch above.
ext_probs <- list(
CGM = ext_CGM$ensemble_prob,
Smartwatch = ext_SW$ensemble_prob,
Baseline = ext_Base$ensemble_prob
)
nb_fn <- function(y, prob, t) {
pred <- as.integer(prob >= t)
tp <- sum(pred == 1 & y == 1)
fp <- sum(pred == 1 & y == 0)
tp / length(y) - fp / length(y) * t / (1 - t)
}
thresholds <- seq(0.05, 0.50, by = 0.01)
dca_df <- do.call(rbind, lapply(thresholds, function(t) {
data.frame(
threshold = t,
model = c("CGM", "Smartwatch", "Baseline", "Treat all", "Treat none"),
net_benefit = c(
nb_fn(y_ext_bin, ext_probs$CGM, t),
nb_fn(y_ext_bin, ext_probs$Smartwatch, t),
nb_fn(y_ext_bin, ext_probs$Baseline, t),
ir_prev - (1 - ir_prev) * t / (1 - t),
0
)
)
}))
p_dca <- ggplot(dca_df, aes(x = threshold, y = net_benefit,
colour = model, linetype = model)) +
geom_line(linewidth = 1.2) +
scale_colour_manual(
values = c("CGM" = "#1f77b4", "Smartwatch" = "#2ca02c",
"Baseline" = "#d62728", "Treat all" = "#888888",
"Treat none" = "#bbbbbb"),
labels = c("CGM" = "CGM model", "Smartwatch" = "Smartwatch model",
"Baseline" = "Baseline", "Treat all" = "Treat all",
"Treat none" = "Treat none")
) +
scale_linetype_manual(
values = c("CGM" = "solid", "Smartwatch" = "dashed",
"Baseline" = "dotdash", "Treat all" = "dotted",
"Treat none" = "dotted"),
labels = c("CGM" = "CGM model", "Smartwatch" = "Smartwatch model",
"Baseline" = "Baseline", "Treat all" = "Treat all",
"Treat none" = "Treat none")
) +
geom_hline(yintercept = 0, colour = "black", linewidth = 0.6) +
geom_vline(xintercept = ir_prev, colour = "grey40",
linetype = "dashed", linewidth = 0.8, alpha = 0.4) +
annotate("text", x = ir_prev + 0.01, y = 0.38,
label = sprintf("Prevalence\n(%.2f)", ir_prev),
size = 5.5, colour = "grey30", hjust = 0) +
coord_cartesian(xlim = c(0.05, 0.50), ylim = c(-0.06, 0.42)) +
labs(title = "B", x = "Threshold probability", y = "Net benefit",
colour = NULL, linetype = NULL) +
theme_bw(base_size = 18) +
theme(
plot.title = element_text(face = "bold", hjust = 0, size = 28),
axis.title = element_text(size = 20, face = "bold"),
axis.text = element_text(size = 18),
legend.position = "right",
legend.text = element_text(size = 14),
legend.spacing.y = unit(4, "pt"),
panel.grid.minor = element_blank()
)
# COMBINE
fig_final <- p_roc | p_dca
print(fig_final)# Original full-run output filenames:
# ggsave("figure_final_roc_dca.pdf", fig_final, width = 16, height = 7.5, device = cairo_pdf)
# ggsave("figure_final_roc_dca.png", fig_final, width = 16, height = 7.5, dpi = 300)
# added: write separate files so this 5-iteration review run does not overwrite
# the full-run outputs.
ggsave("figure_final_roc_dca_5iter.pdf", fig_final, width = 16, height = 7.5, device = cairo_pdf)
ggsave("figure_final_roc_dca_5iter.png", fig_final, width = 16, height = 7.5, dpi = 300)All analyses complete. Final figure saved as
figure_final_roc_dca_5iter.pdf and
.png