The highest (posterior) density interval (HPDI) for a sample of values is narrowest interval that contains x% of the data. The 90% HPDI would be the smallest interval that contains 90% of the data.

What’s the 90% HPDI for these values?

library(dplyr, warn.conflicts = FALSE)

xs <- 1:20
xs
#>  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20

Using existing implementations, we get…

xs <- 1:20

hpdi1 <- HDInterval::hdi(xs, credMass = .9)
hpdi1
#> lower upper 
#>     1    19 
#> attr(,"credMass")
#> [1] 0.9

hpdi2 <- coda::HPDinterval(coda::as.mcmc(xs), .9)
hpdi2
#>      lower upper
#> var1     1    19
#> attr(,"Probability")
#> [1] 0.9

But these intervals contain more than 90% of the data.

within <- function(xs, range) {
  min(range) <= xs & xs <= max(range)
}

mean(within(xs, hpdi1))
#> [1] 0.95
mean(within(xs, hpdi2))
#> [1] 0.95

So I rolled my own version. The algorithm for finding the HPDI involves looking at all intervals with x% of the data, and keeping the one with the smallest interval. That’s what I do here.

compute_hpdi <- function(xs, prob = .9) {
  x_sorted <- sort(xs)
  n <- length(xs)

  num_to_keep <- ceiling(prob * n)
  num_to_drop <- n - num_to_keep

  possible_starts <- seq(1, num_to_drop + 1, by = 1)
  # Just count down from the other end
  possible_ends <- rev(seq(from = n, length = num_to_drop + 1, by = -1))

  # Find smallest interval
  span <- x_sorted[possible_ends] - x_sorted[possible_starts]
  edge <- which.min(span)
  edges <- c(possible_starts[edge], possible_ends[edge])

  # My requirement: length of span interval must be same as number to keep.
  # Other methods produce intervals that are 1 longer.
  stopifnot(length(edges[1]:edges[2]) == num_to_keep)
  
  x_sorted[edges]
}

compute_hpdi(xs)
#> [1]  1 18
mean(within(xs, compute_hpdi(xs)))
#> [1] 0.9

That seems more correct to me.

Bunch of tests where my version does best

compare_methods <- function(xs, prob = .9) {
  x <- rlang::f_label(rlang::enquo(xs))
  hpdi1 <- HDInterval::hdi(xs, credMass = prob)
  hpdi2 <- coda::HPDinterval(coda::as.mcmc(xs), prob)
  hpdi3 <- compute_hpdi(xs, prob)
  
  tibble::tribble(
    ~ x, ~ Method, ~ lower, ~ upper, ~ coverage, ~ target,
    x, "HDInterval", min(hpdi1), max(hpdi1), mean(within(xs, hpdi1)), prob,
    x, "coda", min(hpdi2), max(hpdi2), mean(within(xs, hpdi2)), prob,
    x, "TJ", min(hpdi3), max(hpdi3), mean(within(xs, hpdi3)), prob
  ) %>% 
    mutate(span = upper - lower)
}

compare_methods(rnorm(20))
#> # A tibble: 3 x 7
#>             x     Method      lower    upper coverage target     span
#>         <chr>      <chr>      <dbl>    <dbl>    <dbl>  <dbl>    <dbl>
#> 1 `rnorm(20)` HDInterval -0.7010839 1.368485     0.95    0.9 2.069569
#> 2 `rnorm(20)`       coda -0.7010839 1.368485     0.95    0.9 2.069569
#> 3 `rnorm(20)`         TJ -0.7010839 1.279017     0.90    0.9 1.980101
compare_methods(rnorm(200))
#> # A tibble: 3 x 7
#>              x     Method     lower    upper coverage target     span
#>          <chr>      <chr>     <dbl>    <dbl>    <dbl>  <dbl>    <dbl>
#> 1 `rnorm(200)` HDInterval -1.798850 1.260706    0.905    0.9 3.059555
#> 2 `rnorm(200)`       coda -1.798850 1.260706    0.905    0.9 3.059555
#> 3 `rnorm(200)`         TJ -1.739833 1.260706    0.900    0.9 3.000538
compare_methods(rnorm(200000))
#> # A tibble: 3 x 7
#>                x     Method     lower    upper coverage target     span
#>            <chr>      <chr>     <dbl>    <dbl>    <dbl>  <dbl>    <dbl>
#> 1 `rnorm(2e+05)` HDInterval -1.663529 1.629114 0.900005    0.9 3.292643
#> 2 `rnorm(2e+05)`       coda -1.663529 1.629114 0.900005    0.9 3.292643
#> 3 `rnorm(2e+05)`         TJ -1.677298 1.615311 0.900000    0.9 3.292609

compare_methods(iris$Sepal.Length)
#> # A tibble: 3 x 7
#>                     x     Method lower upper  coverage target  span
#>                 <chr>      <chr> <dbl> <dbl>     <dbl>  <dbl> <dbl>
#> 1 `iris$Sepal.Length` HDInterval   4.4   6.9 0.9066667    0.9   2.5
#> 2 `iris$Sepal.Length`       coda   4.4   6.9 0.9066667    0.9   2.5
#> 3 `iris$Sepal.Length`         TJ   4.4   6.9 0.9066667    0.9   2.5
compare_methods(iris$Sepal.Width)
#> # A tibble: 3 x 7
#>                    x     Method lower upper  coverage target  span
#>                <chr>      <chr> <dbl> <dbl>     <dbl>  <dbl> <dbl>
#> 1 `iris$Sepal.Width` HDInterval   2.4   3.8 0.9066667    0.9   1.4
#> 2 `iris$Sepal.Width`       coda   2.4   3.8 0.9066667    0.9   1.4
#> 3 `iris$Sepal.Width`         TJ   2.4   3.8 0.9066667    0.9   1.4

compare_methods(mtcars$mpg)
#> # A tibble: 3 x 7
#>              x     Method lower upper coverage target  span
#>          <chr>      <chr> <dbl> <dbl>    <dbl>  <dbl> <dbl>
#> 1 `mtcars$mpg` HDInterval  13.3  32.4  0.90625    0.9  19.1
#> 2 `mtcars$mpg`       coda  10.4  30.4  0.93750    0.9  20.0
#> 3 `mtcars$mpg`         TJ  13.3  32.4  0.90625    0.9  19.1
compare_methods(mtcars$mpg, .09312)
#> # A tibble: 3 x 7
#>              x     Method lower upper coverage  target  span
#>          <chr>      <chr> <dbl> <dbl>    <dbl>   <dbl> <dbl>
#> 1 `mtcars$mpg` HDInterval  21.4  21.5  0.09375 0.09312   0.1
#> 2 `mtcars$mpg`       coda  21.0  21.4  0.12500 0.09312   0.4
#> 3 `mtcars$mpg`         TJ  21.4  21.5  0.09375 0.09312   0.1

compare_methods(mtcars$wt)
#> # A tibble: 3 x 7
#>             x     Method lower upper coverage target  span
#>         <chr>      <chr> <dbl> <dbl>    <dbl>  <dbl> <dbl>
#> 1 `mtcars$wt` HDInterval 1.513 4.070  0.90625    0.9 2.557
#> 2 `mtcars$wt`       coda 1.835 5.424  0.93750    0.9 3.589
#> 3 `mtcars$wt`         TJ 1.513 4.070  0.90625    0.9 2.557
compare_methods(mtcars$wt, .5)
#> # A tibble: 3 x 7
#>             x     Method lower upper coverage target  span
#>         <chr>      <chr> <dbl> <dbl>    <dbl>  <dbl> <dbl>
#> 1 `mtcars$wt` HDInterval  3.15 4.070  0.53125    0.5 0.920
#> 2 `mtcars$wt`       coda  3.15 4.070  0.53125    0.5 0.920
#> 3 `mtcars$wt`         TJ  3.15 3.845  0.50000    0.5 0.695