library(inlabru)
library(fmesher)
library(INLA)
library(ggplot2)
library(patchwork)

bru_options_set(inla.mode = "experimental")

The data and the overdispersion that forces a NegBin

mexdolphin <- mexdolphin_sf
mesh       <- mexdolphin$mesh
W          <- 8                       # truncation distance (km)

gs      <- mexdolphin$points$size
gs_mean <- mean(gs)
gs_var  <- var(gs)
gs_vm   <- gs_var / gs_mean

47 detected groups. Group sizes run from 15 to 650 animals, mean 98.9, variance 1.1268^{4}. The variance/mean ratio is 113.9 well above 1, so a Poisson mark would badly understate the spread. This ratio is the whole reason the size distribution, and therefore the abundance variance, needs a NegBin with a finite overdispersion parameter \(\kappa\).

p1 <- ggplot(data.frame(size = gs)) +
  geom_histogram(aes(size), bins = 25, fill = "#2C6E8F",
                 colour = "white", boundary = 0) +
  labs(x = "Group size (animals)", y = "Count",
       title = "Group-size distribution",
       subtitle = paste0("var/mean = ", round(gs_vm, 1),
                         "  \u2192 strong overdispersion")) +
  theme_bw()

p2 <- ggplot() +
  gg(mexdolphin$ppoly) +
  gg(mexdolphin$samplers, colour = "grey70", linewidth = 0.3) +
  gg(mexdolphin$points, aes(colour = size), size = 2, alpha = 0.85) +
  scale_colour_viridis_c(option = "plasma", name = "Group\nsize") +
  coord_sf(datum = fm_crs(mexdolphin$ppoly)) +
  labs(x = NULL, y = NULL, title = "Detected groups by size") +
  theme_bw()

p1 + p2
Left: strongly right-skewed group-size distribution. Right: groups on the study region, coloured by size.

Left: strongly right-skewed group-size distribution. Right: groups on the study region, coloured by size.


hn <- function(distance, sigma) exp(-0.5 * (distance / sigma)^2)

matern <- inla.spde2.pcmatern(mesh,
  prior.sigma = c(2, 0.01), prior.range = c(50, 0.01))
matern_size <- inla.spde2.pcmatern(mesh,
  prior.sigma = c(1, 0.01), prior.range = c(50, 0.01))

cmp <- ~ mySPDE(main = geometry, model = matern) +
  gp_mySPDE(main = geometry, model = matern_size) +
  sigma(1, prec.linear = 1,
        marginal = bm_marginal(qexp, pexp, dexp, rate = 1 / 8)) +
  Intercept(1) +
  gp_Intercept(1)

The mark is a zero-truncated NegBin. nznbinomial is the native family (inlabru \(\ge\) 2.14.1.9007); The point process carries log(2) for the two-sided transect.

fit <- bru(
  components = cmp,

  # Likelihood 1: thinned point process for group locations
  like(
    formula = geometry + distance ~ mySPDE +
      log(hn(distance, sigma)) + Intercept + log(2),
    family = "cp",
    data = mexdolphin$points,
    samplers = mexdolphin$samplers,
    domain = list(geometry = mesh,
                  distance = fm_mesh_1d(seq(0, W, length.out = 30)))
  ),

  # Likelihood 2: zero-truncated NegBin for group sizes (>= 1)
  like(
    formula = size ~ gp_Intercept + gp_mySPDE,
    family  = "nznbinomial",
    data    = mexdolphin$points,
    domain  = list(geometry = mesh)
  ),

  options = bru_options(bru_verbose = FALSE,
                        control.compute = list(dic = TRUE))
)
summary(fit)
## inlabru version: 2.14.1.9007 
## INLA version: 25.03.24 
## Latent components:
## mySPDE: main = spde(geometry)
## sigma: main = linear(1), marginal(NULL)
## Intercept: main = linear(1)
## gp_mySPDE: main = spde(geometry)
## gp_Intercept: main = linear(1)
## Observation models:
##   Model tag: <No tag>
##     Family: 'cp'
##     Data class: 'sf', 'tbl_df', 'tbl', 'data.frame'
##     Response class: 'numeric'
##     Predictor: geometry + distance ~ mySPDE + log(hn(distance, sigma)) + Intercept + log(2)
##     Additive/Linear/Rowwise: FALSE/FALSE/TRUE
##     Used components: effect[mySPDE, sigma, Intercept], latent[] 
##   Model tag: <No tag>
##     Family: 'nznbinomial'
##     Data class: 'sf', 'tbl_df', 'tbl', 'data.frame'
##     Response class: 'numeric'
##     Predictor: size ~ gp_Intercept + gp_mySPDE
##     Additive/Linear/Rowwise: TRUE/TRUE/TRUE
##     Used components: effect[gp_mySPDE, gp_Intercept], latent[] 
## Time used:
##     Pre = 0.467, Running = 1.85, Post = 1.01, Total = 3.33 
## Fixed effects:
##                mean    sd 0.025quant 0.5quant 0.975quant   mode kld
## sigma        -0.050 0.209     -0.461   -0.050      0.360 -0.050   0
## Intercept    -8.180 0.510     -9.405   -8.116     -7.362 -8.044   0
## gp_Intercept  4.475 0.255      3.949    4.479      4.974  4.478   0
## 
## Random effects:
##   Name     Model
##     mySPDE SPDE2 model
##    gp_mySPDE SPDE2 model
## 
## Model hyperparameters:
##                                                       mean      sd 0.025quant
## size for nbinomial_0 zero-inflated observations[2]   3.874   1.116      2.119
## Range for mySPDE                                   292.102 146.466    108.436
## Stdev for mySPDE                                     0.810   0.250      0.417
## Range for gp_mySPDE                                165.453  79.546     62.473
## Stdev for gp_mySPDE                                  0.728   0.152      0.474
##                                                    0.5quant 0.975quant    mode
## size for nbinomial_0 zero-inflated observations[2]    3.729       6.47   3.457
## Range for mySPDE                                    259.684     668.34 206.862
## Stdev for mySPDE                                      0.777       1.39   0.717
## Range for gp_mySPDE                                 148.653     367.55 120.484
## Stdev for gp_mySPDE                                   0.713       1.07   0.682
## 
## Deviance Information Criterion (DIC) ...............: 1258.38
## Deviance Information Criterion (DIC, saturated) ....: 24365.67
## Effective number of parameters .....................: 34.00
## 
## Marginal log-Likelihood:  -655.35 
##  is computed 
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
# Shared overdispersion kappa.
kappa <- unname(fit$summary.hyperpar[
  grep("size for.*nbinomial", rownames(fit$summary.hyperpar))[1], "mean"])
kappa
## [1] 3.874478
pxl <- fm_pixels(mesh, dims = c(200, 100), mask = mexdolphin$ppoly)

# group intensity (groups/km^2)
pr.int <- predict(fit, pxl, ~ exp(mySPDE + Intercept))

# expected group size: ZERO-TRUNCATED mean mu_T = mu / (1 - p0)
pr.size <- predict(fit, pxl, ~ {
  mu <- exp(gp_Intercept + gp_mySPDE)
  mu / (1 - (kappa / (kappa + mu))^kappa)
})

# animal density D = lambda_groups * mu_T
pr.dens <- predict(fit, pxl, ~ {
  lambda <- exp(mySPDE + Intercept)
  mu     <- exp(gp_Intercept + gp_mySPDE)
  lambda * mu / (1 - (kappa / (kappa + mu))^kappa)
})
ggplot() +
  gg(pr.int, geom = "tile") +
  gg(mexdolphin$ppoly, alpha = 0, linewidth = 0.8) +
  gg(mexdolphin$points, size = 0.5, alpha = 0.8) +
  scale_fill_viridis_c(option = "viridis", name = "Groups\n/km\u00b2") +
  labs(x = NULL, y = NULL, title = "Group intensity") +
  theme_bw()
Group intensity (groups/km^2).

Group intensity (groups/km^2).

ggplot() +
  gg(pr.dens, geom = "tile") +
  gg(mexdolphin$ppoly, alpha = 0, linewidth = 0.8) +
  gg(mexdolphin$points, size = 0.5, alpha = 0.8) +
  scale_fill_viridis_c(option = "viridis", name = "Animals\n/km\u00b2") +
  labs(x = NULL, y = NULL,
       title = "Animal density",
       subtitle = expression(D(s) == lambda[groups](s) %*% mu[T](s))) +
  theme_bw()
Animal density (animals/km^2) = group intensity x zero-truncated expected group size.

Animal density (animals/km^2) = group intensity x zero-truncated expected group size.

ggplot() +
  gg(pr.dens, geom = "tile", aes(fill = sd / mean)) +
  gg(mexdolphin$ppoly, alpha = 0, linewidth = 0.8) +
  scale_fill_viridis_c(option = "cividis", name = "CV") +
  labs(x = NULL, y = NULL,
       title = "Density-surface uncertainty (CV of the estimated rate)")+
  theme_bw()
Per-pixel CV of the density SURFACE (posterior sd/mean). This is estimation uncertainty of the mean surface only; it is NOT the abundance CV (see below).

Per-pixel CV of the density SURFACE (posterior sd/mean). This is estimation uncertainty of the mean surface only; it is NOT the abundance CV (see below).


The variance of total abundance

Total abundance is the sum of the sizes of all groups in the region:

\[ N \;=\; \sum_{k:\, \mathbf{y}_k \in A} G_k , \]

where the group locations \(\{\mathbf{y}_k\}\) are a point process with intensity \(\lambda_{\text{groups}}(\mathbf{s})\) and each \(G_k\) is a zero-truncated NegBin group size. Two independent sources of randomness are stacked here:

  1. how many groups there are and where the point process;
  2. how big each group is the mark.

This is a compound point-process sum. Its variance is not \(\text{Var}\!\left(\int \lambda\,\mu_T\right)\); that would only be the variance of its mean.

Abundance \(N\) is a realised count. Even with the fields known perfectly, \(N\) is still random, because groups still occur as a random process and still carry random sizes. That extra, physical randomness is the compound term.

Law of total variance

Condition on the latent fields \(\boldsymbol{\theta}\) (all SPDE fields, intercepts, \(\sigma\)):

\[ \text{Var}(N) = \underbrace{\mathbb{E}\!\left[\text{Var}(N \mid \boldsymbol{\theta})\right]}_{\text{Term 1: within-field, the compound process}} + \underbrace{\text{Var}\!\left[\mathbb{E}(N \mid \boldsymbol{\theta})\right]}_{\text{Term 2: field / estimation uncertainty}} . \]

The two conditional moments

Given the fields, the group process is Poisson, so the compound-sum identities give a conditional mean and a conditional variance that are both integrals over space:

\[ \mathbb{E}(N \mid \boldsymbol{\theta}) = \int_A \lambda_{\text{groups}}(\mathbf{s})\, \mathbb{E}_T[G \mid \mathbf{s}]\, d\mathbf{s}, \]

\[ \text{Var}(N \mid \boldsymbol{\theta}) = \int_A \lambda_{\text{groups}}(\mathbf{s})\, \mathbb{E}_T[G^2 \mid \mathbf{s}]\, d\mathbf{s}. \]

The conditional variance is the compound-Poisson result: variance of a Poisson sum equals the rate times the second momen* of the jumps. This is why \(\mathbb{E}_T[G^2]\) — not just the mean — enters, and why overdispersion inflates the abundance variance so strongly.

Zero-truncated NegBin moments

Let \(\mu(\mathbf{s}) = \exp(\gamma + \zeta(\mathbf{s}))\) be the untruncated NegBin mean from the linear predictor, and \(\kappa\) the size (overdispersion) parameter. The untruncated zero probability is

\[ p_0(\mathbf{s}) = \left(\frac{\kappa}{\kappa + \mu(\mathbf{s})}\right)^{\!\kappa}. \]

Because a value of \(0\) contributes nothing to either raw moment, truncating away the zeros just renormalises by \(1 - p_0\):

\[ \mathbb{E}_T[G \mid \mathbf{s}] = \frac{\mu}{1 - p_0}, \qquad \mathbb{E}_T[G^2 \mid \mathbf{s}] = \frac{\mu + \mu^2\!\left(1 + \tfrac{1}{\kappa}\right)}{1 - p_0}. \]

(The untruncated second moment \(\mu + \mu^2(1 + 1/\kappa)\) follows from \(\text{Var} = \mu + \mu^2/\kappa\) and \(\mathbb{E}[G^2] = \text{Var} + \mu^2\).) The \(1/\kappa\) term is where overdispersion enters \(\mathbb{E}_T[G^2]\): smaller \(\kappa\) (more overdispersion) means a much larger second moment and therefore a much larger Term 1.

Putting it together

\[ \boxed{\; \text{Var}(N) = \mathbb{E}\!\left[\int_A \lambda\, \mathbb{E}_T[G^2]\, d\mathbf{s}\right] + \text{Var}\!\left[\int_A \lambda\, \mathbb{E}_T[G]\, d\mathbf{s}\right] \;} \]

Estimate both by Monte Carlo over posterior samples: for each sample evaluate the two integrals; Term 1 is the mean of the \(\int\lambda\,\mathbb{E}_T[G^2]\) values, Term 2 is the variance of the \(\int\lambda\,\mathbb{E}_T[G]\) values.


Computing it

predpts <- fm_int(mesh, mexdolphin$ppoly, format = "sf")

post <- predict(
  fit, predpts,
  ~ {
    lambda <- exp(mySPDE + Intercept)
    mu     <- exp(gp_Intercept + gp_mySPDE)
    p0     <- (kappa / (kappa + mu))^kappa
    mu_t   <- mu / (1 - p0)                          # E_T[G]
    EG2    <- (mu + mu^2 * (1 + 1 / kappa)) / (1 - p0)  # E_T[G^2]
    c(expect   = sum(weight * lambda * mu_t),   # E(N | theta)
      variance = sum(weight * lambda * EG2))    # Var(N | theta)
  },
  n.samples = 2000
)
EN        <- post["expect",   "mean"]                 # E[N]
term1     <- post["variance", "mean"]                 # E[ Var(N|theta) ]  (compound)
term2     <- post["expect",   "sd"]^2                 # Var[ E(N|theta) ]  (field)
VarN      <- term1 + term2
sdN       <- sqrt(VarN)
cvN       <- sdN / EN

results <- data.frame(
  Quantity = c("E[N]", "Term 1  (compound, within-field)",
               "Term 2  (field / estimation)",
               "Var(N) total", "SD(N)", "CV(N)",
               "95% CI lower", "95% CI upper"),
  Value = c(round(EN), round(term1), round(term2),
            round(VarN), round(sdN), round(cvN, 3),
            round(EN - 1.96 * sdN), round(EN + 1.96 * sdN))
)
results
Quantity Value
E[N] 29414.000
Term 1 (compound, within-field) 9182576.000
Term 2 (field / estimation) 76633791.000
Var(N) total 85816367.000
SD(N) 9264.000
CV(N) 0.315
95% CI lower 11258.000
95% CI upper 47571.000