library(tidyverse)Null threshold for Precision@R and mAP
Set up experiment
m <- 4 # the number of white balls in the urn (replicates)
n <- 32 # the number of black balls in the urn (negative controls)
k <- m # the number of balls drawn from the urn, hence must be in 0,1,…, m+n (top-k)
nn <- 10000 # number of iterationsCreate indicator vector (top-k are TRUE, i.e. white)
y <- as.factor(seq(m + n) <= k)
y_rank <- 1 - (seq(m + n) / (m + n))Generate many samples of precision at k (by random sampling of labels)
retrieval_metrics_empirical <-
map_df(seq(nn), function(i) {
x <- as.factor(sample(c(rep(FALSE, n), rep(TRUE, m))))
pr_at_k_l <-
yardstick::precision_vec(x, y, event_level = "second")
average_precision_l <-
yardstick::average_precision_vec(x, y_rank, event_level = "second")
tibble(pr_at_k_l, average_precision_l)
})retrieval_metrics_empirical %>%
ggplot(aes(pr_at_k_l, average_precision_l)) +
geom_point()Precision@K (under the null hypothesis) follows the hypergeometric distribution
Now sample from the hypergeometric distribution
pr_at_k_l <- rhyper(
nn = nn,
m = m,
n = n,
k = k
) / k
dist_simulation_pr_at_k <-
data.frame(pr_at_k_l)Compute thresholds
thresholds <-
tribble(
~name,
~value,
"m / (m + n)",
m / (m + n),
"empirical mean",
mean(retrieval_metrics_empirical$pr_at_k_l),
"hyper 95pct",
qhyper(
p = 0.95,
m = m,
n = n,
k = k
) / k
)
thresholdsPlot both samples; they should look very similar.
bind_rows(
retrieval_metrics_empirical %>% select(pr_at_k_l) %>% mutate(type = "empirical"),
dist_simulation_pr_at_k %>% mutate(type = "simulation")
) %>%
ggplot(aes(pr_at_k_l)) +
geom_histogram(binwidth = .01) +
facet_wrap( ~ type, ncol = 1) +
geom_vline(
mapping = aes(xintercept = value,
colour = name),
data = thresholds,
show.legend = TRUE
) +
theme_bw()We are using \frac{m}{m+n}, i.e., the mean of the hypergeometric distribution as the null threshold. But we could instead use some higher quantile e.g. the 95th percentile as the null threshold.
This is easy to do for Pr@K but the story is a little more complicated for Average Precision, because there is likely no closed form for the distribution that AP follows.
(Bestgen 2015) provides an iterative process to compute the mean but it’s not clear how to extend this to a given percentile
RandomAPExact <-
function(m = 0, n = 0) {
ap <- 0
for (x in 1:m) {
for (k in x:(n + x)) {
ap <- ap + dhyper(x = x, m = m, n = n, k = k) * (x / k) * (x / k)
}
}
ap <- ap / m
ap
}However, we could instead use an empirical estimate for the the 95th percentile:
thresholds_ap_pr_at_k <-
tribble(
~name,
~value,
"pr_at_k_l",
quantile(retrieval_metrics_empirical$pr_at_k_l, .95, names = FALSE),
"average_precision_l",
quantile(retrieval_metrics_empirical$average_precision_l, .95, names = FALSE),
"pr_at_k_l",
mean(retrieval_metrics_empirical$pr_at_k_l),
"average_precision_l",
mean(retrieval_metrics_empirical$average_precision_l)
)
thresholds_ap_pr_at_kretrieval_metrics_empirical %>%
pivot_longer(everything()) %>%
ggplot(aes(value)) +
geom_histogram(bins = 50) +
geom_vline(
mapping = aes(xintercept = value,
colour = name),
data = thresholds_ap_pr_at_k,
show.legend = TRUE
) +
facet_wrap( ~ name, ncol = 1) +
theme_bw() +
labs(caption = "Vertical lines indicate mean and 95th percentile")