# Re-run a coarser grid (N × k) for the heatmap — use fewer reps
n_hm_reps <- 10
ss_hm_sizes <- ss_sizes
items_hm <- seq(10, ni_total, by = 10) # 10, 20, 30, 40
set.seed(999)
hm_results <- do.call(rbind, lapply(ss_hm_sizes, function(n_s) {
do.call(rbind, lapply(items_hm, function(k) {
do.call(rbind, lapply(seq_len(n_hm_reps), function(r) {
att_idx <- which(dat$is_non_attender == 0)
non_idx <- which(dat$is_non_attender == 1)
prop_non <- mean(dat$is_non_attender == 1)
n_non <- round(n_s * prop_non)
n_att <- n_s - n_non
row_idx <- c(
sample(att_idx, min(n_att, length(att_idx)), replace = FALSE),
sample(non_idx, min(n_non, length(non_idx)), replace = FALSE)
)
if (length(row_idx) < 20) return(NULL)
item_idx <- sample(seq_len(ni_total), k, replace = FALSE)
sub_rt <- rt_mat[row_idx, ][, item_idx, drop = FALSE]
sub_cx <- cx_all[item_idx]
truth <- (dat$is_non_attender == 1)[row_idx]
n_att_s <- sum(!truth)
if (n_att_s < 5 || sum(truth) < 5) return(NULL)
cx_rk_hm <- rank(sub_cx)
rho_hm <- apply(sub_rt, 1, function(rt_i)
cor(cx_rk_hm, rank(rt_i), method = "spearman"))
wtau_hm <- wtau_vectorised(sub_rt, sub_cx)
kneedle_stats <- function(signal, truth) {
signal[!is.finite(signal)] <- NA
sorted <- sort(signal, na.last = NA)
if (length(sorted) < 2)
return(list(sens = NA, spec = NA, fp_pct = NA, fn_pct = NA))
k_i <- kneedle_fn(sorted, sign = -1)
flag <- signal > sorted[k_i]
cs <- confusion_stats(flag, truth)
n_att_loc <- sum(!truth)
n_non_loc <- sum(truth)
list(sens = cs$Sensitivity,
spec = cs$Specificity,
fp_pct = if (n_att_loc > 0) 100 * cs$FP / n_att_loc else NA,
fn_pct = if (n_non_loc > 0) 100 * cs$FN / n_non_loc else NA)
}
s_rho <- kneedle_stats(-rho_hm, truth)
s_wtau <- kneedle_stats(-wtau_hm, truth)
data.frame(
n = n_s, k = k,
sens_rho = s_rho$sens, sens_wtau = s_wtau$sens,
spec_rho = s_rho$spec, spec_wtau = s_wtau$spec,
fp_pct_rho = s_rho$fp_pct, fp_pct_wtau = s_wtau$fp_pct,
fn_pct_rho = s_rho$fn_pct, fn_pct_wtau = s_wtau$fn_pct
)
}))
}))
}))
hm_summary <- hm_results |>
group_by(n, k) |>
summarise(
sens_rho = round(mean(sens_rho, na.rm = TRUE), 3),
sens_wtau = round(mean(sens_wtau, na.rm = TRUE), 3),
spec_rho = round(mean(spec_rho, na.rm = TRUE), 3),
spec_wtau = round(mean(spec_wtau, na.rm = TRUE), 3),
fp_pct_rho = round(mean(fp_pct_rho, na.rm = TRUE), 1),
fp_pct_wtau = round(mean(fp_pct_wtau, na.rm = TRUE), 1),
fn_pct_rho = round(mean(fn_pct_rho, na.rm = TRUE), 1),
fn_pct_wtau = round(mean(fn_pct_wtau, na.rm = TRUE), 1),
.groups = "drop"
)
fmt_label_y <- function(x) formatC(as.numeric(x), format = "d", big.mark = ",")
make_heatmap <- function(data, rho_col, wtau_col, metric_label,
fill_label, low_col, mid_col, high_col,
midpoint_val, fmt = "%.2f") {
long_df <- data |>
select(n, k, rho = all_of(rho_col), wtau = all_of(wtau_col)) |>
tidyr::pivot_longer(cols = c(rho, wtau),
names_to = "feature", values_to = "value") |>
mutate(feature = ifelse(feature == "rho", "Spearman \u03c1", "Weighted \u03c4"))
ggplot(long_df, aes(x = factor(k), y = factor(n), fill = value)) +
geom_tile(colour = "white", linewidth = 0.5) +
geom_text(aes(label = sprintf(fmt, value)), size = 2.6) +
scale_fill_gradient2(low = low_col, mid = mid_col, high = high_col,
midpoint = midpoint_val, na.value = "grey85",
name = fill_label) +
facet_wrap(~ feature, ncol = 2) +
scale_y_discrete(labels = fmt_label_y) +
labs(title = paste(metric_label, "heatmap: N \u00d7 items retained"),
x = "Items retained (k)", y = "Sample size (N)") +
theme(axis.text.x = element_text(angle = 0),
legend.position = "right")
}
p_hm_sens <- make_heatmap(hm_summary, "sens_rho", "sens_wtau",
"Sensitivity", "Mean\nSensitivity",
"tomato", "lightyellow", "#2e7d32", 0.5)
p_hm_spec <- make_heatmap(hm_summary, "spec_rho", "spec_wtau",
"Specificity", "Mean\nSpecificity",
"tomato", "lightyellow", "#2e7d32", 0.5)
p_hm_fp <- make_heatmap(hm_summary, "fp_pct_rho", "fp_pct_wtau",
"False Positive Rate (%)", "Mean FP\nRate (%)",
"#2e7d32", "lightyellow", "tomato", 50, fmt = "%.1f%%")
p_hm_fn <- make_heatmap(hm_summary, "fn_pct_rho", "fn_pct_wtau",
"False Negative Rate (%)", "Mean FN\nRate (%)",
"#2e7d32", "lightyellow", "tomato", 50, fmt = "%.1f%%")
p_hm_sens / p_hm_spec / p_hm_fp / p_hm_fn