Bayesian Conditional Random Fields

Joint species distribution models (JSDMs) typically either estimate residual species association networks or induce correlations among species using latent variables. These models usually do not examine direct effects (i.e. the conditional effect of one species' presence on another's occurrence probability), nor do they allow these effects to vary along environmental gradients. Conditional Random Fields (CRFs) overcome these limitations by estimating a joint Markov network, but they are costly to estimate and existing approaches (MRFcov for example) do not formulate a generative model that can make predictions for unsampled areas. This document describes an approach to build such a generative CRF model in a Bayesian framework. Herein, conditional co-occurrence associations (direct effects of one species on another) are jointly estimated, as are effects that represent how these pairwise associations change along environemental gradients. To overcome the issue of out-of-sample prediction, a set of latent variables is used to capture variance associated with co-occurrence effects when simulating new data from the model, borrowing from ideas used by many JSDM frameworks (i.e. Boral, HMSC, manyglm). This is very much a work-in-progress, but the following working example demonstrates that the idea is feasible and insightful.

First we simulate binary data that reflects a fixed set of direct and indirect co-occurrence parameters. Simulate the symmetric direct co-occurrence effect matrix. Here, species 1 and 2, and species 1 and 3, show direct co-occurrence effects; all other pairwise combinations are 0

beta_cooccur <- matrix(0, 4, 4)
beta_cooccur[1, 2] <- beta_cooccur[2, 1] <- 1.55
beta_cooccur[1, 3] <- beta_cooccur[3, 1] <- -0.8
diag(beta_cooccur) <- 1
beta_cooccur

##       [,1] [,2] [,3] [,4]
## [1,]  1.00 1.55 -0.8    0
## [2,]  1.55 1.00  0.0    0
## [3,] -0.80 0.00  1.0    0
## [4,]  0.00 0.00  0.0    1

Next simulate varying associations across the environment for species 1 vs 2. This is done in an array in case we wish to include more covariate-varying associations in later simulations

beta_x_ind <- array(0, dim = c(1, 4, 4))
beta_x_ind[1, 1, 2] <- beta_x_ind[1, 2, 1] <- -0.65
diag(beta_x_ind[1, , ]) <- 0
beta_x_ind[1, , ]

##       [,1]  [,2] [,3] [,4]
## [1,]  0.00 -0.65    0    0
## [2,] -0.65  0.00    0    0
## [3,]  0.00  0.00    0    0
## [4,]  0.00  0.00    0    0

Set simulation parameters

set.seed(10)
n_obs <- 500
alphas <- runif(ncol(beta_cooccur), -1.75, 
    1.75)
betas <- runif(ncol(beta_cooccur), -0.65, 
    0.65)
env <- sort(rnorm(n_obs))

Estimate the latent variables per species

xs <- matrix(rnorm(NCOL(beta_cooccur) * n_obs), 
    ncol = NCOL(beta_cooccur))

Post adjust the linear predictors using the simulated co-occurrence effects

data <- xs %*% beta_cooccur + env * xs %*% 
    beta_x_ind[1, , ]

Convert the linear predictors to probabilities using inverse logit, and take Bernoulli draws

env_adj <- matrix(NA, nrow = n_obs, ncol = NCOL(beta_cooccur))
for (i in 1:n_obs) {
    env_adj[i, ] <- xs[i, ] %*% ((env[i] * 
        beta_x_ind[1, , ]) + beta_cooccur)
}

data <- matrix(NA, nrow = n_obs, ncol = NCOL(beta_cooccur))
for (i in 1:NCOL(beta_cooccur)) {
    data[, i] <- alphas[i] + env_adj[, i] + 
        env * betas[i]
}
head(data)

##           [,1]     [,2]        [,3]       [,4]
## [1,]  1.617618 2.950001  0.03931257 1.53659187
## [2,]  4.316053 5.910665 -1.27115694 0.35504440
## [3,]  1.863015 3.738819 -0.97043882 0.04673351
## [4,]  4.671375 1.306840  0.27000264 1.44096526
## [5,] -0.711041 4.011945 -0.96610964 0.46061320
## [6,]  5.006869 4.538369  0.08552669 4.99545544

Convert to binary observations using Bernoulli draws; calculate observed prevalences

data <- data.frame(apply(data, 2, function(x) rbinom(n_obs, 
    1, boot::inv.logit(x))))
colnames(data) <- paste0("sp", seq_len(NCOL(data)))
data$env <- env
apply(data[, 1:NCOL(beta_cooccur)], 2, function(x) sum(x)/length(x))

##   sp1   sp2   sp3   sp4 
## 0.524 0.388 0.432 0.616

Create the predictor matrix for any covariates that are being scrutinised for their possible effects on co-occurrence associations; this can be scaled up to handle more covariates but we will just stick to the propZos covariate for this example

n_covariates <- 1
n_nodes <- NCOL(beta_cooccur)
x_direct <- matrix(NA, nrow = NROW(data), 
    ncol = n_covariates)
for (i in 1:n_covariates) {
    x_direct[, i] <- as.matrix(data)[, (n_nodes + 
        i)]
}
head(x_direct)

##           [,1]
## [1,] -3.001431
## [2,] -2.518335
## [3,] -2.436616
## [4,] -2.329880
## [5,] -2.321017
## [6,] -2.205472

Describe the Bayesian model using the probabilistic programming language JAGS

model_code <- "
model
{
  
#### Marginal likelihoods ####
  for (i in 1:N) {
   for (node in 1:N_nodes){
     y_marginal[i, node] ~ dbern(mu_cooccur[i, node])
     # Missing values are predicted using random intercepts, covariates and an error term
     # to capture the missing co-occurrence effects;
     # in essence, these are marginal predictions that can be used to simulate
     # Bernoulli draws for each species in unsampled areas
     logit(mu_cooccur[i, node]) <- alpha[node] + 
                                   x_direct[i, ] %*% beta_x_dir[, node] +
                                   error[i, node]
   }
  }

# Latent factors capture missing species association terms from the CRF model below.
# Note that we do not explicitly model these as multivariate normal, BUT they can still 
# demonstrate complex posterior associations with one another and with our measured covariates. This 
# is useful as the latent factors can be conditionally predicted out-of-sample (they will not 
# necessarily act as white noise when predicting for new areas) because of these associations
# with measured covariates. Also note that for larger sets of factors the variances can be 
# progressively penalised as in mvgam, but this isn't necessary in the simple example
  for (i in 1:N){
   for (j in 1:N_lv){
     LV[i, j] ~ dnorm(0, 1)
   }
  }

# Latent factor loadings: standard normal with identifiability constraints;
# these could be regularised using ddexp(0, lambda) if need be, but we don't need 
# them to necessarily be sparse. It is the cumulative 'error' term that is of interest,
# not the actual loadings. The upper triangle of loading matrix set to zero
  for (j in 1:(N_lv - 1)){
   for (j2 in (j + 1):N_lv){
    lv_coefs[j, j2] <- 0
  }
 }
  
# Positive constraints on loading diagonals
  for (j in 1:N_lv) {
   lv_coefs[j, j] ~ dnorm(0, 1)T(0, 1);
 }
  
# Lower diagonal free
  for (j in 2:N_lv){
   for (j2 in 1:(j - 1)){
    lv_coefs[j, j2] ~ dnorm(0, 1)T(-1, 1);
  }
 }
  
# Other elements also free
  for (j in (N_lv + 1):N_nodes) {
   for (j2 in 1:N_lv){
    lv_coefs[j, j2] ~ dnorm(0, 1)T(-1, 1);
  }
 }
  
# 'error' term for each species depends on latent factor associations
  for (i in 1:N){
   for (node in 1:N_nodes){
    error[i, node] <- inprod(lv_coefs[node,], LV[i,])
  }
 }

#### Design matrix construction for the CRF model ####
# Direct effect (co-occurrence) linear predictor contributions
  y_dir_linpred[1:N, 1:N_nodes] <- y_marginal %*% beta_cooccur

# Indirect covariate effects are modelled as interactions between marginal effects
# and covariates; set these up as an array to scale to >1 covariate
  for (i in 1:N){
   for (cov in 1:N_covariates){
    x_indirect[cov, i, 1:N_nodes] <- x_direct[i, cov] * y_marginal[i, ]
   }
 }
  
#### CRF model replaces 'error' above with direct and indirect co-occurrence effects ####
  for (i in 1:N) {
   for (node in 1:N_nodes){
     ys[i, node] ~ dbern(mu[i, node]);
     
     # Linear pedictor is a function of:
     # species-specific hierarchical intercepts;
     # fixed additive covariate effects;
     # direct co-occurrence effects;
     # interactions between covariates and co-occurrence effects
     logit(mu[i, node]) <- alpha[node] + 
                           x_direct[i, ] %*% beta_x_dir[, node] +
                           y_dir_linpred[i, node] +
                           x_indirect[, i, ] %*% beta_x_ind[, node, ]
  }
 }

#### Posterior simulations of outcomes ####
  for (i in 1:N) {
   for (node in 1:N_nodes){
     ypreds[i, node] ~ dbern(mu[i, node]);
   }
  }

#### Priors ####
# Hierarchical intercepts
   for (node in 1:N_nodes){
    # non-centred parameterisation sometimes needed #
    # alpha_raw[node] ~ dnorm(0, 1) #
    # alpha[node] <- mu_alpha + (sigma_alpha * alpha_raw[node]) #
    alpha[node] ~ dnorm(mu_alpha, tau_alpha)
  }
mu_alpha ~ dnorm(-1, 1)
sigma_alpha ~ dnorm(0, 1)T(0, )
tau_alpha <- pow(sigma_alpha, -2)

# Direct covariate effects; L1 regularised currently, but these could easily be modelled
# using a multivariate normal with scaled Wishart prior for the precision to capture any 
# species dependencies in environmental responses
  for (node in 1:N_nodes){
   for (cov in 1:N_covariates){
    beta_x_dir[cov, node] ~ ddexp(0, lambda)
   }
  }

# Direct effects (co-occurrence associations); L1 regularised
   for (node_a in 1:N_nodes) {
    # Diagonal elements are zero
    beta_cooccur[node_a, node_a] <- 0
  
    # Off-diagonal elements are symmetric
     for (node_b in (node_a + 1):N_nodes) {
      beta_cooccur[node_a, node_b] ~ ddexp(0, lambda)
      beta_cooccur[node_b, node_a] <- beta_cooccur[node_a, node_b]
   }
  }

# Indirect covariate effects; L1 regularised
  for(node_a in 1:N_nodes) {
   for (cov in 1:N_covariates){
    beta_x_ind[cov, node_a, node_a] <- 0
    for(node_b in (node_a + 1):N_nodes) {
      beta_x_ind[cov, node_a, node_b] ~ ddexp(0, lambda)
      beta_x_ind[cov, node_b, node_a] <- beta_x_ind[cov, node_a, node_b]
    }
   }
  }

# LASSO parameter is complexity-penalising; pulls toward
# larger penalties unless the data support smaller ones
lambda_raw ~ dbeta(3, 1)
lambda <- lambda_raw * 15
  
}
"

Prepare the data for JAGS; note how the observed binary occurrence matrix (ys) is supplied as data for the outcome AND for the y_marginal effect (co-occurrence) predictor matrix; this allows each species' presence/absence to act as a predictor of each other species' occurrence probability; It also satisfies the full generative capabilities we require, as marginal predictions can be made for unsampled areas using a set of latent variables that are constrained to represent out-of-sample co-occurrence effects. So in a way this model combines the most useful properties from latent variable JSDMs such as HMSC (inducing species' correlations using latent variables that themselves can have complex posterior associations with measured covariates to improve out of sample predictions) as well as CRFs (estimating direct co-occurrence effects and how these effects change along environmental gradients)

model_data <- list(N = NROW(data), 
                   ys = as.matrix(data[,1:NCOL(beta_cooccur)]),
                   y_marginal = as.matrix(data[,1:NCOL(beta_cooccur)]),
                   N_covariates = 1,
                   x_direct = as.matrix(x_direct),
                   N_nodes = NCOL(beta_cooccur),
                   # use 2 latent variables in this simple example
                   N_lv = 2)

Define the parameters to monitor during sampling

model_parameters <- c("alpha", "mu_alpha", 
    "sigma_alpha", "beta_cooccur", "beta_x_dir", 
    "beta_x_ind", "lambda", "mu_cooccur", 
    "mu", "lv_coefs", "error", "ypreds")

Run the model using four parallel Gibbs MCMC chains in runjags; takes ~ 8 minutes currently on a six-core machine with 32GB RAM

library(runjags)
library(parallel)
chains = 4
cl <- parallel::makePSOCKcluster(min(c(chains, 
    parallel::detectCores() - 1)))
setDefaultCluster(cl)
unlink("crf_mod.txt")
cat(model_code, file = "crf_mod.txt", sep = "\n", 
    append = T)
model_run <- run.jags(data = model_data, 
    modules = "glm", n.chains = chains, adapt = 1000, 
    burnin = 1500, sample = 1000, thin = 1, 
    method = "rjparallel", monitor = model_parameters, 
    cl = cl, model = "crf_mod.txt")
stopCluster(cl)
model_run <- coda::as.mcmc.list(model_run)
unlink("crf_mod.txt")
save(data, beta_cooccur, beta_x_ind, model_run, 
    n_nodes, file = "sim_model_run.rda")

Now we use the model for posterior inference. Define some general plotting colours

library(runjags)

## Warning: package 'runjags' was built under R version 4.0.5

load("sim_model_run.rda")
c_light <- c("#DCBCBC")
c_light_trans <- c("#DCBCBC70")
c_light_highlight <- c("#C79999")
c_mid <- c("#B97C7C")
c_mid_highlight <- c("#A25050")
c_mid_highlight_trans <- c("#A2505095")
c_dark <- c("#8F2727")
c_dark_highlight <- c("#7C0000")

Plot some traceplots to examine convergence of chains for key parameters of interest

MCMCvis::MCMCtrace(model_run, c("alpha"), 
    n.eff = T, pdf = F, Rhat = T, main_den = paste0(colnames(data)[1:n_nodes], 
        " intercept"), main_tr = paste0(colnames(data)[1:n_nodes], 
        " intercept"), col_den = c_mid, lwd_den = 2.5, 
    col_txt = "black")

MCMCvis::MCMCtrace(model_run, c("mu_alpha", 
    "sigma_alpha"), n.eff = T, pdf = F, Rhat = T, 
    main_den = c("Pop. intercept mu", "Pop. intercept sigma"), 
    main_tr = c("Pop. intercept mu", "Pop. intercept sigma"), 
    col_den = c_mid, lwd_den = 2.5, col_txt = "black")

Plot hierarchical intercept posteriors

par(mfrow = c(1, 1), mar = c(4, 4.5, 3, 4))
alphas <- MCMCvis::MCMCchains(model_run, 
    "alpha")
probs = c(0.05, 0.2, 0.3, 0.4, 0.5, 0.6, 
    0.7, 0.8, 0.95)
alpha_creds <- sapply(1:NCOL(alphas), function(n) quantile(alphas[, 
    n], probs = probs))
pop_alphas <- MCMCvis::MCMCchains(model_run, 
    "mu_alpha")
pop_sigmas <- MCMCvis::MCMCchains(model_run, 
    "sigma_alpha")
pop_intercepts <- rnorm(dim(pop_alphas)[1], 
    pop_alphas[, 1], sqrt(pop_sigmas[, 1]))
pop_creds <- quantile(pop_intercepts, probs = probs)
N <- n_nodes + 1
x <- 1:N
idx <- rep(1:N, each = 2)
repped_x <- rep(x, each = 2)
x <- sapply(1:length(idx), function(k) if (k%%2 == 
    0) repped_x[k] + min(diff(x))/2 else repped_x[k] - 
    min(diff(x))/2)

plot(1, type = "n", ylab = expression(Intercept ~ 
    (alpha[species])), xlab = "", xlim = range(x), 
    xaxt = "n", ylim = range(c(as.vector(alpha_creds)), 
        pop_creds))

rect(xleft = x[seq(1, N * 2, by = 2)], xright = x[seq(2, 
    N * 2, by = 2)], ytop = as.numeric(c(alpha_creds[9, 
    ], pop_creds[9])), ybottom = as.numeric(c(alpha_creds[1, 
    ], pop_creds[1])), col = c_light, border = "transparent")
rect(xleft = x[seq(1, N * 2, by = 2)], xright = x[seq(2, 
    N * 2, by = 2)], ytop = as.numeric(c(alpha_creds[8, 
    ], pop_creds[8])), ybottom = as.numeric(c(alpha_creds[2, 
    ], pop_creds[2])), col = c_light_highlight, 
    border = "transparent")
rect(xleft = x[seq(1, N * 2, by = 2)], xright = x[seq(2, 
    N * 2, by = 2)], ytop = as.numeric(c(alpha_creds[7, 
    ], pop_creds[7])), ybottom = as.numeric(c(alpha_creds[3, 
    ], pop_creds[3])), col = c_mid, border = "transparent")
rect(xleft = x[seq(1, N * 2, by = 2)], xright = x[seq(2, 
    N * 2, by = 2)], ytop = as.numeric(c(alpha_creds[6, 
    ], pop_creds[6])), ybottom = as.numeric(c(alpha_creds[4, 
    ], pop_creds[4])), col = c_mid_highlight, 
    border = "transparent")
axis(side = 1, at = 1:N, labels = c(colnames(data)[1:n_nodes], 
    "Population"), font.axis = 3)

for (k in 1:(N - 1)) {
    lines(x = c(x[seq(1, N * 2, by = 2)][k], 
        x[seq(2, N * 2, by = 2)][k]), y = c(alpha_creds[5, 
        k], alpha_creds[5, k]), col = c_dark, 
        lwd = 2)
}
lines(x = x[-c(1:((N - 1) * 2))], y = c(pop_creds[5], 
    pop_creds[5]), col = c_dark, lwd = 2)
abline(h = 0, col = "white", lty = 1, lwd = 3.5, 
    xpd = F)
abline(h = 0, col = "black", lty = 1, lwd = 2, 
    xpd = F)

Posteriors for the direct covariate effect (Beta env)

par(mfrow = c(1, 1), mar = c(4, 4.5, 3, 4))
all_cov_dirs <- MCMCvis::MCMCchains(model_run, 
    "beta_x_dir")
cov <- 1
beta_x_dirs <- all_cov_dirs[, grepl(paste0(cov, 
    ","), dimnames(all_cov_dirs)[[2]], perl = T)]
beta_x_dir_creds <- sapply(1:NCOL(beta_x_dirs), 
    function(n) quantile(beta_x_dirs[, n], 
        probs = probs))
N <- n_nodes
x <- 1:N
idx <- rep(1:N, each = 2)
repped_x <- rep(x, each = 2)
x <- sapply(1:length(idx), function(k) if (k%%2 == 
    0) repped_x[k] + min(diff(x))/2 else repped_x[k] - 
    min(diff(x))/2)

plot(1, type = "n", ylab = expression(Env ~ 
    effect ~ (beta[env])), xlab = "", xlim = range(x), 
    xaxt = "n", ylim = range(c(as.vector(beta_x_dir_creds))))

rect(xleft = x[seq(1, N * 2, by = 2)], xright = x[seq(2, 
    N * 2, by = 2)], ytop = as.numeric(c(beta_x_dir_creds[9, 
    ])), ybottom = as.numeric(c(beta_x_dir_creds[1, 
    ])), col = c_light, border = "transparent")
rect(xleft = x[seq(1, N * 2, by = 2)], xright = x[seq(2, 
    N * 2, by = 2)], ytop = as.numeric(c(beta_x_dir_creds[8, 
    ])), ybottom = as.numeric(c(beta_x_dir_creds[2, 
    ])), col = c_light_highlight, border = "transparent")
rect(xleft = x[seq(1, N * 2, by = 2)], xright = x[seq(2, 
    N * 2, by = 2)], ytop = as.numeric(c(beta_x_dir_creds[7, 
    ])), ybottom = as.numeric(c(beta_x_dir_creds[3, 
    ])), col = c_mid, border = "transparent")
rect(xleft = x[seq(1, N * 2, by = 2)], xright = x[seq(2, 
    N * 2, by = 2)], ytop = as.numeric(c(beta_x_dir_creds[6, 
    ])), ybottom = as.numeric(c(beta_x_dir_creds[4, 
    ])), col = c_mid_highlight, border = "transparent")
axis(side = 1, at = 1:N, labels = c(colnames(data)[1:n_nodes]), 
    font.axis = 3)
for (k in 1:(N)) {
    lines(x = c(x[seq(1, N * 2, by = 2)][k], 
        x[seq(2, N * 2, by = 2)][k]), y = c(beta_x_dir_creds[5, 
        k], beta_x_dir_creds[5, k]), col = c_dark, 
        lwd = 2)
}
abline(h = 0, col = "white", lty = 1, lwd = 3.5, 
    xpd = F)
abline(h = 0, col = "black", lty = 1, lwd = 2, 
    xpd = F)

Direct co-occurrence effect posterior distributions

plot_marginal <- function(xlab = "", posterior_samples, 
    xlim, title = "", ylab = "", add = FALSE, 
    truth, nbins = 50) {
    posterior_values <- posterior_samples
    bin_lims <- range(posterior_values)
    delta <- diff(range(posterior_values))/nbins
    breaks <- seq(bin_lims[1], bin_lims[2] + 
        delta, delta)
    
    if (missing(xlim)) {
        xlim <- range(c(0, range(posterior_samples)))
    }
    
    if (!add) {
        hist(posterior_values, breaks = breaks, 
            main = title, xlab = xlab, xlim = xlim, 
            ylab = "", yaxt = "n", col = c_mid, 
            border = c_dark_highlight, cex.lab = 1.25, 
            cex.main = 1.25, font.lab = 4, 
            font.main = 4, freq = F)
        
        if (ylab != "") {
            title(ylab = ylab, mgp = c(0.5, 
                0.5, 0), cex.lab = 1.25, 
                font.lab = 4)
        }
        
        if (!missing(truth)) {
            abline(v = truth, col = "white", 
                lty = 1, lwd = 3.5, xpd = F)
            abline(v = truth, col = "black", 
                lty = 1, lwd = 2, xpd = F)
        } else {
            abline(v = 0, col = "white", 
                lty = 1, lwd = 3.5, xpd = F)
            abline(v = 0, col = "black", 
                lty = 1, lwd = 2, xpd = F)
        }
        
        
    } else {
        hist(posterior_values, breaks = breaks, 
            main = title, xlab = xlab, xlim = range(c(0, 
                range(posterior_samples))), 
            ylab = "", yaxt = "n", col = c_light_trans, 
            border = c_mid_highlight_trans, 
            cex.lab = 1.25, cex.main = 1.25, 
            font.lab = 4, font.main = 4, 
            add = T, freq = F)
    }
    
}

beta_cooccurs <- MCMCvis::MCMCchains(model_run, 
    "beta_cooccur")
plot_mat <- matrix(NA, ncol = 4, nrow = 4)
for (i in 1:n_nodes) {
    for (j in 1:n_nodes) {
        plot_mat[i, j] <- paste(i, j)
    }
}
plot_mat[upper.tri(plot_mat)] <- "blank"
diag(plot_mat) <- rep("blank", n_nodes)
plot_mat <- plot_mat[, -n_nodes]

par(mfrow = c(n_nodes - 1, n_nodes - 1), 
    mar = c(4, 2, 2, 1), xpd = T)
for (i in 2:n_nodes) {
    for (j in 1:(n_nodes - 1)) {
        
        if (plot_mat[i, j] == "blank") {
            plot.new()
        } else {
            
            if (j == 1) {
                ylab = colnames(data)[i]
            } else {
                ylab = ""
            }
            
            if ((i - j) == 1) {
                title = colnames(data)[j]
            } else {
                title = ""
            }
            plot_marginal(title = title, 
                ylab = ylab, posterior_samples = beta_cooccurs[, 
                  grepl(paste0("[", i, ",", 
                    j, "]"), dimnames(beta_cooccurs)[[2]], 
                    fixed = T)])
        }
    }
}
mtext(side = 1, line = -1, text = expression(Co - 
    occurrence ~ association ~ (beta[co - 
    occur])), outer = T)

Remember our original direct effect parameters were a positive association among species 1 and 2, and a negative association among species 1 and 3 (ignore the 1s on the diagonals):

beta_cooccur

##       [,1] [,2] [,3] [,4]
## [1,]  1.00 1.55 -0.8    0
## [2,]  1.55 1.00  0.0    0
## [3,] -0.80 0.00  1.0    0
## [4,]  0.00 0.00  0.0    1

The model recovers these direct co-occurrence effects well. Now plot posterior distributions for the indirect effect of the covariate on co-occurrences. This plot shows how much co-occurrence is expected to vary across sites with low env values (dark red) compared to sites with high env values (light red)

all_cov_inds <- MCMCvis::MCMCchains(model_run, 
    "beta_x_ind")
cov <- 1
beta_x_inds <- all_cov_inds[, grepl(paste0("beta_x_ind\\[", 
    cov), dimnames(all_cov_inds)[[2]], perl = T)]
dimnames(beta_x_inds)[[2]] <- dimnames(beta_cooccurs)[[2]]

par(mfrow = c(n_nodes - 1, n_nodes - 1), 
    mar = c(4, 2, 2, 1), xpd = T)
for (i in 2:n_nodes) {
    for (j in 1:(n_nodes - 1)) {
        
        if (plot_mat[i, j] == "blank") {
            plot.new()
        } else {
            
            if (j == 1) {
                ylab = colnames(data)[i]
            } else {
                ylab = ""
            }
            
            if ((i - j) == 1) {
                title = colnames(data)[j]
            } else {
                title = ""
            }
            
            samples_neg <- ((beta_x_inds[, 
                grepl(paste0("[", i, ",", 
                  j, "]"), dimnames(beta_cooccurs)[[2]], 
                  fixed = T)] * -1.25) + 
                beta_cooccurs[, grepl(paste0("[", 
                  i, ",", j, "]"), dimnames(beta_cooccurs)[[2]], 
                  fixed = T)])
            samples_pos <- ((beta_x_inds[, 
                grepl(paste0("[", i, ",", 
                  j, "]"), dimnames(beta_cooccurs)[[2]], 
                  fixed = T)] * 1.25) + beta_cooccurs[, 
                grepl(paste0("[", i, ",", 
                  j, "]"), dimnames(beta_cooccurs)[[2]], 
                  fixed = T)])
            plot_marginal(title = title, 
                ylab = ylab, posterior_samples = samples_neg, 
                xlim = range(c(0, range(samples_pos), 
                  range(samples_neg))))
            plot_marginal(title = title, 
                ylab = ylab, posterior_samples = samples_pos, 
                add = T)
            
        }
    }
}
legend("topright", inset = c(0, -0.3), legend = c("low env", 
    "high env"), bg = "white", col = c(c_mid, 
    c_light), lty = 1, lwd = 2, bty = "n")
mtext(side = 1, line = -1, text = expression(Co - 
    occurrence ~ association ~ (beta[co - 
    occur])), outer = T)

Our original parameters for indirect effects only included a decrease in association for species 1 and 2 as the env variable increased. There is some slight support for this effect but it isn't as strong as we might hope. Clearly binary data can be challenging for inferring underlying patterns, even in simulated datasets

beta_x_ind[1, , ]

##       [,1]  [,2] [,3] [,4]
## [1,]  0.00 -0.65    0    0
## [2,] -0.65  0.00    0    0
## [3,]  0.00  0.00    0    0
## [4,]  0.00  0.00    0    0

Plot latent variable-induced posterior correlations; these capture all co-occurrence effects, including direct and covariate-mediated effects, so can differ somewhat to the co-occurrence plots above. These broadly show that the model is capturing the relatively strong co-occurrence effects that were simulated originally

errors <- MCMCvis::MCMCchains(model_run, 
    "error")

all_errors <- lapply(seq_len(n_nodes), function(i) {
    errors[, grepl(paste0(",", i, "]"), dimnames(errors)[[2]], 
        fixed = T)]
})

corrs <- (lapply(seq_len(dim(errors)[1]), 
    function(i) {
        cov2cor(cov(sapply(all_errors, `[`, 
            i, )))
    }))

par(mfrow = c(n_nodes - 1, n_nodes - 1), 
    mar = c(4, 2, 2, 1), xpd = T)


for (i in 2:n_nodes) {
    for (j in 1:(n_nodes - 1)) {
        
        if (plot_mat[i, j] == "blank") {
            plot.new()
        } else {
            
            if (j == 1) {
                ylab = colnames(data)[i]
            } else {
                ylab = ""
            }
            
            if ((i - j) == 1) {
                title = colnames(data)[j]
            } else {
                title = ""
            }
            
            corr_estimates <- sapply(corrs, 
                "[", i, j)
            
            plot_marginal(title = title, 
                ylab = ylab, posterior_samples = corr_estimates, 
                xlim = c(-1, 1))
            
        }
    }
}
mtext(side = 1, line = -1, text = expression(Latent ~ 
    variable ~ correlations), outer = T)

We will next use empirical data from Clark et al 2016, which is a co-occurrence dataset for four avian parasite species infecting New Caledonian Zosterops hosts. For the first example we won't estimate the relative abundance of Zosterops species (proportion of all hosts in the community that belong to the focal host Zosterops species) as we did in the original models, but that is certainly doable for improving the model going forward. Load the data and prepare it for analysis

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.0.4

## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'tibble'

## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'pillar'

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

data <- read.csv("Clark_et_al_Supplement_RawData.csv")
data <- data %>% dplyr::group_by(Capture.session) %>% 
    dplyr::mutate(tot = dplyr::n(), tot_zos = length(which(Genus == 
        "Zosterops")), island = as.factor(Island)) %>% 
    dplyr::mutate(prop_zos = tot_zos/(tot + 
        1)) %>% dplyr::ungroup() %>% dplyr::filter(Genus == 
    "Zosterops") %>% dplyr::select(H.zosteropis:Microfilaria, 
    prop_zos, island)
data$prop_zos <- as.vector(scale(data$prop_zos))

NAs are present, which would cause problems in most JSDM approaches, but these can be handled in the probabilistic generative model

summary(data[, 1:4])

##   H.zosteropis     H.killangoi          Plas        Microfilaria 
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.000   Min.   :0.00  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.00  
##  Median :0.0000   Median :0.0000   Median :0.000   Median :0.00  
##  Mean   :0.2787   Mean   :0.1218   Mean   :0.196   Mean   :0.16  
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.000   3rd Qu.:0.00  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.000   Max.   :1.00  
##  NA's   :22       NA's   :22                       NA's   :174

Convert the island variable to numeric for estimating a hierarchical island-effect term; store original factor levels

orig_island_levels <- levels(data$island)
island <- as.numeric(data$island)
data$island <- NULL

Create the binary outcome matrix to use as observed responses

n_nodes <- 4
n_covariates <- NCOL(data) - n_nodes
ys <- as.matrix(data[, 1:n_nodes])

x_direct <- matrix(NA, nrow = NROW(ys), ncol = n_covariates)
for (i in 1:n_covariates) {
    x_direct[, i] <- as.matrix(data)[, (n_nodes + 
        i)]
}
head(x_direct)

##            [,1]
## [1,]  0.2574622
## [2,] -1.7148197
## [3,] -1.4216010
## [4,] -1.3668238
## [5,] -0.9231291
## [6,]  0.9995482

The model file here is slightly modified to allow for hierarchical island effects

model_code <- "
model
{
  
#### Marginal likelihoods ####
  for (i in 1:N) {
   for (node in 1:N_nodes){
     y_marginal[i, node] ~ dbern(mu_cooccur[i, node])
     # Missing values are predicted using random intercepts, covariates and an error term
     # to capture the missing co-occurrence effects;
     # in essence, these are marginal predictions that can be used to simulate
     # Bernoulli draws for each species in unsampled areas
     logit(mu_cooccur[i, node]) <- alpha[node] + 
                                   beta_island[island[i]] + 
                                   x_direct[i, ] %*% beta_x_dir[, node] +
                                   error[i, node]
   }
  }

# Latent factors capture missing species association terms from the CRF model below
  for (i in 1:N){
   for (j in 1:N_lv){
     LV[i, j] ~ dnorm(0, 1)
   }
  }

# Latent factor loadings: standard normal with identifiability constraints
# Upper triangle of loading matrix set to zero
  for (j in 1:(N_lv - 1)){
   for (j2 in (j + 1):N_lv){
    lv_coefs[j, j2] <- 0
  }
 }
  
# Positive constraints on loading diagonals
  for (j in 1:N_lv) {
   lv_coefs[j, j] ~ dnorm(0, 1)T(0, 1);
 }
  
# Lower diagonal free
  for (j in 2:N_lv){
   for (j2 in 1:(j - 1)){
    lv_coefs[j, j2] ~ dnorm(0, 1)T(-1, 1);
  }
 }
  
# Other elements also free
  for (j in (N_lv + 1):N_nodes) {
   for (j2 in 1:N_lv){
    lv_coefs[j, j2] ~ dnorm(0, 1)T(-1, 1);
  }
 }
  
# 'error' term for each species depends on latent factor associations
  for (i in 1:N){
   for (node in 1:N_nodes){
    error[i, node] <- inprod(lv_coefs[node,], LV[i,])
  }
 }

#### Design matrix construction for the CRF model ####
# Direct effect (co-occurrence) linear predictor contributions
  y_dir_linpred[1:N, 1:N_nodes] <- y_marginal %*% beta_cooccur

# Indirect covariate effects are modelled as interactions between marginal effects
# and covariates; set these up as an array to scale to >1 covariate
  for (i in 1:N){
   for (cov in 1:N_covariates){
    x_indirect[cov, i, 1:N_nodes] <- x_direct[i, cov] * y_marginal[i, ]
   }
 }
  
#### CRF model replaces 'error' above with direct and indirect co-occurrence effects ####
  for (i in 1:N) {
   for (node in 1:N_nodes){
     ys[i, node] ~ dbern(mu[i, node]);
     
     # Linear pedictor is a function of:
     # species-specific hierarchical intercepts;
     # island-specific hierarchical intercepts;
     # fixed additive covariate effects;
     # direct co-occurrence effects;
     # interactions between covariates and co-occurrence effects
     logit(mu[i, node]) <- alpha[node] + 
                           beta_island[island[i]] +
                           x_direct[i, ] %*% beta_x_dir[, node] +
                           y_dir_linpred[i, node] +
                           x_indirect[, i, ] %*% beta_x_ind[, node, ]
  }
 }

#### Posterior simulations of outcomes ####
  for (i in 1:N) {
   for (node in 1:N_nodes){
     ypreds[i, node] ~ dbern(mu_cooccur[i, node]);
   }
  }

#### Priors ####
# Hierarchical intercepts
   for (node in 1:N_nodes){
    # non-centred parameterisation sometimes needed #
    # alpha_raw[node] ~ dnorm(0, 1) #
    # alpha[node] <- mu_alpha + (sigma_alpha * alpha_raw[node]) #
    alpha[node] ~ dnorm(mu_alpha, tau_alpha)
  }
mu_alpha ~ dnorm(-1, 1)
sigma_alpha ~ dnorm(0, 1)T(0, )
tau_alpha <- pow(sigma_alpha, -2)

# Hierarchical island effects
  for (island in 1:N_islands){
   # beta_island_raw[island] ~ dnorm(0, 1) #
   # beta_island[island] <- mu_island + (sigma_island * beta_island_raw[island]) #
   beta_island[island] ~ dnorm(mu_island, tau_island)
  }
mu_island ~ dnorm(-1, 1)
sigma_island ~ dnorm(0, 1)T(0, )
tau_island <- pow(sigma_island, -2)

# Direct covariate effects; L1 regularised
  for (node in 1:N_nodes){
   for (cov in 1:N_covariates){
    beta_x_dir[cov, node] ~ ddexp(0, lambda)
   }
  }

# Direct effects (co-occurrence associations); L1 regularised
   for (node_a in 1:N_nodes) {
    # Diagonal elements are zero
    beta_cooccur[node_a, node_a] <- 0
  
    # Off-diagonal elements are symmetric
     for (node_b in (node_a + 1):N_nodes) {
      beta_cooccur[node_a, node_b] ~ ddexp(0, lambda)
      beta_cooccur[node_b, node_a] <- beta_cooccur[node_a, node_b]
   }
  }

# Indirect covariate effects; L1 regularised
  for(node_a in 1:N_nodes) {
   for (cov in 1:N_covariates){
    beta_x_ind[cov, node_a, node_a] <- 0
    for(node_b in (node_a + 1):N_nodes) {
      beta_x_ind[cov, node_a, node_b] ~ ddexp(0, lambda)
      beta_x_ind[cov, node_b, node_a] <- beta_x_ind[cov, node_a, node_b]
    }
   }
  }

# LASSO parameter is complexity-penalising; pulls toward
# larger penalties unless the data support smaller ones
lambda_raw ~ dbeta(3, 1)
lambda <- lambda_raw * 15
  
}
"

Prepare the data for JAGS

model_data <- list(N = NROW(ys), 
                   ys = as.matrix(ys),
                   y_marginal = as.matrix(ys),
                   # replace above lines with these for sampling from priors
                   # ys = matrix(NA, nrow = nrow(ys), ncol = ncol(ys)),
                   # y_marginal = matrix(NA, nrow = nrow(ys), ncol = ncol(ys)),
                   island = island,
                   N_covariates = n_covariates,
                   x_direct = as.matrix(x_direct),
                   N_nodes = NCOL(ys),
                   N_islands = length(orig_island_levels),
                   # use 2 latent variables in this simple example
                   N_lv = 2)

Define the parameters to monitor during sampling

model_parameters <- c("alpha", "mu_alpha", 
    "sigma_alpha", "beta_island", "mu_island", 
    "sigma_island", "beta_cooccur", "beta_x_dir", 
    "beta_x_ind", "lambda", "mu_cooccur", 
    "mu", "lv_coefs", "error", "ypreds")

Run the model using four parallel Gibbs MCMC chains in runjags; takes ~ 45-60 minutes currently on a six-core machine with 32GB RAM

library(runjags)
library(parallel)
chains = 4
cl <- parallel::makePSOCKcluster(min(c(chains, 
    parallel::detectCores() - 1)))
setDefaultCluster(cl)
unlink("crf_mod.txt")
cat(model_code, file = "crf_mod.txt", sep = "\n", 
    append = T)
model_run <- run.jags(data = model_data, 
    modules = "glm", n.chains = chains, adapt = 1000, 
    burnin = 1000, sample = 4000, thin = 4, 
    method = "rjparallel", monitor = model_parameters, 
    cl = cl, model = "crf_mod.txt")
stopCluster(cl)
model_run <- coda::as.mcmc.list(model_run)
unlink("crf_mod.txt")
save(model_run, data, model_data, orig_island_levels, 
    file = "model_run.rda")

Plot some traceplots to examine convergence of chains for key parameters of interest

load("model_run.rda")
MCMCvis::MCMCtrace(model_run, c("alpha"), 
    n.eff = T, pdf = F, Rhat = T, main_den = paste0(colnames(data)[1:n_nodes], 
        " intercept"), main_tr = paste0(colnames(data)[1:n_nodes], 
        " intercept"), col_den = c_mid, lwd_den = 2.5, 
    col_txt = "black")

MCMCvis::MCMCtrace(model_run, c("mu_alpha", 
    "sigma_alpha"), n.eff = T, pdf = F, Rhat = T, 
    main_den = c("Pop. intercept mu", "Pop. intercept sigma"), 
    main_tr = c("Pop. intercept mu", "Pop. intercept sigma"), 
    col_den = c_mid, lwd_den = 2.5, col_txt = "black")

MCMCvis::MCMCtrace(model_run, c("beta_island"), 
    n.eff = T, pdf = F, Rhat = T, main_den = paste0(orig_island_levels, 
        " intercept"), main_tr = paste0(orig_island_levels, 
        " intercept"), col_den = c_mid, lwd_den = 2.5, 
    col_txt = "black")

MCMCvis::MCMCtrace(model_run, c("mu_island", 
    "sigma_island"), n.eff = T, pdf = F, 
    Rhat = T, main_den = c("Island intercept mean", 
        "Island intercept sd"), main_tr = c("Island intercept mean", 
        "Island intercept sd"), col_den = c_mid, 
    lwd_den = 2.5, col_txt = "black")

MCMCvis::MCMCtrace(model_run, c("lambda"), 
    n.eff = T, pdf = F, Rhat = T, main_den = "L1 penalty lambda", 
    main_tr = "L1 penalty lambda", col_den = c_mid, 
    lwd_den = 2.5, col_txt = "black")

Plot hierarchical intercept posteriors

par(mfrow = c(1, 1), mar = c(4, 4.5, 3, 4))
alphas <- MCMCvis::MCMCchains(model_run, 
    "alpha")
probs = c(0.05, 0.2, 0.3, 0.4, 0.5, 0.6, 
    0.7, 0.8, 0.95)
alpha_creds <- sapply(1:NCOL(alphas), function(n) quantile(alphas[, 
    n], probs = probs))
pop_alphas <- MCMCvis::MCMCchains(model_run, 
    "mu_alpha")
pop_sigmas <- MCMCvis::MCMCchains(model_run, 
    "sigma_alpha")
pop_intercepts <- rnorm(dim(pop_alphas)[1], 
    pop_alphas[, 1], sqrt(pop_sigmas[, 1]))
pop_creds <- quantile(pop_intercepts, probs = probs)
N <- n_nodes + 1
x <- 1:N
idx <- rep(1:N, each = 2)
repped_x <- rep(x, each = 2)
x <- sapply(1:length(idx), function(k) if (k%%2 == 
    0) repped_x[k] + min(diff(x))/2 else repped_x[k] - 
    min(diff(x))/2)

plot(1, type = "n", ylab = expression(Intercept ~ 
    (alpha[parasite])), xlab = "", xlim = range(x), 
    xaxt = "n", ylim = range(c(as.vector(alpha_creds)), 
        pop_creds))

rect(xleft = x[seq(1, N * 2, by = 2)], xright = x[seq(2, 
    N * 2, by = 2)], ytop = as.numeric(c(alpha_creds[9, 
    ], pop_creds[9])), ybottom = as.numeric(c(alpha_creds[1, 
    ], pop_creds[1])), col = c_light, border = "transparent")
rect(xleft = x[seq(1, N * 2, by = 2)], xright = x[seq(2, 
    N * 2, by = 2)], ytop = as.numeric(c(alpha_creds[8, 
    ], pop_creds[8])), ybottom = as.numeric(c(alpha_creds[2, 
    ], pop_creds[2])), col = c_light_highlight, 
    border = "transparent")
rect(xleft = x[seq(1, N * 2, by = 2)], xright = x[seq(2, 
    N * 2, by = 2)], ytop = as.numeric(c(alpha_creds[7, 
    ], pop_creds[7])), ybottom = as.numeric(c(alpha_creds[3, 
    ], pop_creds[3])), col = c_mid, border = "transparent")
rect(xleft = x[seq(1, N * 2, by = 2)], xright = x[seq(2, 
    N * 2, by = 2)], ytop = as.numeric(c(alpha_creds[6, 
    ], pop_creds[6])), ybottom = as.numeric(c(alpha_creds[4, 
    ], pop_creds[4])), col = c_mid_highlight, 
    border = "transparent")
axis(side = 1, at = 1:N, labels = c(colnames(data)[1:n_nodes], 
    "Population"), font.axis = 3)

for (k in 1:(N - 1)) {
    lines(x = c(x[seq(1, N * 2, by = 2)][k], 
        x[seq(2, N * 2, by = 2)][k]), y = c(alpha_creds[5, 
        k], alpha_creds[5, k]), col = c_dark, 
        lwd = 2)
}
lines(x = x[-c(1:((N - 1) * 2))], y = c(pop_creds[5], 
    pop_creds[5]), col = c_dark, lwd = 2)
abline(h = 0, col = "white", lty = 1, lwd = 3.5, 
    xpd = F)
abline(h = 0, col = "black", lty = 1, lwd = 2, 
    xpd = F)

Plot hierarchical island effect posteriors

par(mfrow = c(1, 1), mar = c(4, 4.5, 3, 4))
alphas <- MCMCvis::MCMCchains(model_run, 
    "beta_island")
probs = c(0.05, 0.2, 0.3, 0.4, 0.5, 0.6, 
    0.7, 0.8, 0.95)
alpha_creds <- sapply(1:NCOL(alphas), function(n) quantile(alphas[, 
    n], probs = probs))
pop_alphas <- MCMCvis::MCMCchains(model_run, 
    "mu_island")
pop_sigmas <- MCMCvis::MCMCchains(model_run, 
    "sigma_island")
pop_intercepts <- rnorm(dim(pop_alphas)[1], 
    pop_alphas[, 1], sqrt(pop_sigmas[, 1]))
pop_creds <- quantile(pop_intercepts, probs = probs)
N <- n_nodes + 1
x <- 1:N
idx <- rep(1:N, each = 2)
repped_x <- rep(x, each = 2)
x <- sapply(1:length(idx), function(k) if (k%%2 == 
    0) repped_x[k] + min(diff(x))/2 else repped_x[k] - 
    min(diff(x))/2)

plot(1, type = "n", ylab = expression(Island ~ 
    effect ~ (beta[island])), xlab = "", 
    xlim = range(x), xaxt = "n", ylim = range(c(as.vector(alpha_creds)), 
        pop_creds))

rect(xleft = x[seq(1, N * 2, by = 2)], xright = x[seq(2, 
    N * 2, by = 2)], ytop = as.numeric(c(alpha_creds[9, 
    ], pop_creds[9])), ybottom = as.numeric(c(alpha_creds[1, 
    ], pop_creds[1])), col = c_light, border = "transparent")
rect(xleft = x[seq(1, N * 2, by = 2)], xright = x[seq(2, 
    N * 2, by = 2)], ytop = as.numeric(c(alpha_creds[8, 
    ], pop_creds[8])), ybottom = as.numeric(c(alpha_creds[2, 
    ], pop_creds[2])), col = c_light_highlight, 
    border = "transparent")
rect(xleft = x[seq(1, N * 2, by = 2)], xright = x[seq(2, 
    N * 2, by = 2)], ytop = as.numeric(c(alpha_creds[7, 
    ], pop_creds[7])), ybottom = as.numeric(c(alpha_creds[3, 
    ], pop_creds[3])), col = c_mid, border = "transparent")
rect(xleft = x[seq(1, N * 2, by = 2)], xright = x[seq(2, 
    N * 2, by = 2)], ytop = as.numeric(c(alpha_creds[6, 
    ], pop_creds[6])), ybottom = as.numeric(c(alpha_creds[4, 
    ], pop_creds[4])), col = c_mid_highlight, 
    border = "transparent")
axis(side = 1, at = 1:N, labels = c(orig_island_levels, 
    "Population"), font.axis = 1)

for (k in 1:(N - 1)) {
    lines(x = c(x[seq(1, N * 2, by = 2)][k], 
        x[seq(2, N * 2, by = 2)][k]), y = c(alpha_creds[5, 
        k], alpha_creds[5, k]), col = c_dark, 
        lwd = 2)
}
lines(x = x[-c(1:((N - 1) * 2))], y = c(pop_creds[5], 
    pop_creds[5]), col = c_dark, lwd = 2)
abline(h = 0, col = "white", lty = 1, lwd = 3.5, 
    xpd = F)
abline(h = 0, col = "black", lty = 1, lwd = 2, 
    xpd = F)

Posteriors for the direct covariate effect (Beta propZos)

par(mfrow = c(1, 1), mar = c(4, 4.5, 3, 4))
all_cov_dirs <- MCMCvis::MCMCchains(model_run, 
    "beta_x_dir")
cov <- 1
beta_x_dirs <- all_cov_dirs[, grepl(paste0(cov, 
    ","), dimnames(all_cov_dirs)[[2]], perl = T)]
beta_x_dir_creds <- sapply(1:NCOL(beta_x_dirs), 
    function(n) quantile(beta_x_dirs[, n], 
        probs = probs))
N <- n_nodes
x <- 1:N
idx <- rep(1:N, each = 2)
repped_x <- rep(x, each = 2)
x <- sapply(1:length(idx), function(k) if (k%%2 == 
    0) repped_x[k] + min(diff(x))/2 else repped_x[k] - 
    min(diff(x))/2)

plot(1, type = "n", ylab = expression(PropZos ~ 
    effect ~ (beta[propZos])), xlab = "", 
    xlim = range(x), xaxt = "n", ylim = range(c(as.vector(beta_x_dir_creds))))

rect(xleft = x[seq(1, N * 2, by = 2)], xright = x[seq(2, 
    N * 2, by = 2)], ytop = as.numeric(c(beta_x_dir_creds[9, 
    ])), ybottom = as.numeric(c(beta_x_dir_creds[1, 
    ])), col = c_light, border = "transparent")
rect(xleft = x[seq(1, N * 2, by = 2)], xright = x[seq(2, 
    N * 2, by = 2)], ytop = as.numeric(c(beta_x_dir_creds[8, 
    ])), ybottom = as.numeric(c(beta_x_dir_creds[2, 
    ])), col = c_light_highlight, border = "transparent")
rect(xleft = x[seq(1, N * 2, by = 2)], xright = x[seq(2, 
    N * 2, by = 2)], ytop = as.numeric(c(beta_x_dir_creds[7, 
    ])), ybottom = as.numeric(c(beta_x_dir_creds[3, 
    ])), col = c_mid, border = "transparent")
rect(xleft = x[seq(1, N * 2, by = 2)], xright = x[seq(2, 
    N * 2, by = 2)], ytop = as.numeric(c(beta_x_dir_creds[6, 
    ])), ybottom = as.numeric(c(beta_x_dir_creds[4, 
    ])), col = c_mid_highlight, border = "transparent")
axis(side = 1, at = 1:N, labels = c(colnames(data)[1:n_nodes]), 
    font.axis = 3)
for (k in 1:(N)) {
    lines(x = c(x[seq(1, N * 2, by = 2)][k], 
        x[seq(2, N * 2, by = 2)][k]), y = c(beta_x_dir_creds[5, 
        k], beta_x_dir_creds[5, k]), col = c_dark, 
        lwd = 2)
}
abline(h = 0, col = "white", lty = 1, lwd = 3.5, 
    xpd = F)
abline(h = 0, col = "black", lty = 1, lwd = 2, 
    xpd = F)

Direct co-occurrence effect posterior distributions

plot_marginal <- function(xlab = "", posterior_samples, 
    xlim, title = "", ylab = "", add = FALSE, 
    truth, nbins = 50) {
    posterior_values <- posterior_samples
    bin_lims <- range(posterior_values)
    delta <- diff(range(posterior_values))/nbins
    breaks <- seq(bin_lims[1], bin_lims[2] + 
        delta, delta)
    
    if (missing(xlim)) {
        xlim <- range(c(0, range(posterior_samples)))
    }
    
    if (!add) {
        hist(posterior_values, breaks = breaks, 
            main = title, xlab = xlab, xlim = xlim, 
            ylab = "", yaxt = "n", col = c_mid, 
            border = c_dark_highlight, cex.lab = 1.25, 
            cex.main = 1.25, font.lab = 4, 
            font.main = 4, freq = F)
        
        if (ylab != "") {
            title(ylab = ylab, mgp = c(0.5, 
                0.5, 0), cex.lab = 1.25, 
                font.lab = 4)
        }
        
        if (!missing(truth)) {
            abline(v = truth, col = "white", 
                lty = 1, lwd = 3.5, xpd = F)
            abline(v = truth, col = "black", 
                lty = 1, lwd = 2, xpd = F)
        } else {
            abline(v = 0, col = "white", 
                lty = 1, lwd = 3.5, xpd = F)
            abline(v = 0, col = "black", 
                lty = 1, lwd = 2, xpd = F)
        }
        
        
    } else {
        hist(posterior_values, breaks = breaks, 
            main = title, xlab = xlab, xlim = range(c(0, 
                range(posterior_samples))), 
            ylab = "", yaxt = "n", col = c_light_trans, 
            border = c_mid_highlight_trans, 
            cex.lab = 1.25, cex.main = 1.25, 
            font.lab = 4, font.main = 4, 
            add = T, freq = F)
    }
    
}

beta_cooccurs <- MCMCvis::MCMCchains(model_run, 
    "beta_cooccur")
plot_mat <- matrix(NA, ncol = 4, nrow = 4)
for (i in 1:n_nodes) {
    for (j in 1:n_nodes) {
        plot_mat[i, j] <- paste(i, j)
    }
}
plot_mat[upper.tri(plot_mat)] <- "blank"
diag(plot_mat) <- rep("blank", n_nodes)
plot_mat <- plot_mat[, -n_nodes]

par(mfrow = c(n_nodes - 1, n_nodes - 1), 
    mar = c(4, 2, 2, 1), xpd = T)
for (i in 2:n_nodes) {
    for (j in 1:(n_nodes - 1)) {
        
        if (plot_mat[i, j] == "blank") {
            plot.new()
        } else {
            
            if (j == 1) {
                ylab = colnames(data)[i]
            } else {
                ylab = ""
            }
            
            if ((i - j) == 1) {
                title = colnames(data)[j]
            } else {
                title = ""
            }
            plot_marginal(title = title, 
                ylab = ylab, posterior_samples = beta_cooccurs[, 
                  grepl(paste0("[", i, ",", 
                    j, "]"), dimnames(beta_cooccurs)[[2]], 
                    fixed = T)])
        }
    }
}
mtext(side = 1, line = -1, text = expression(Co - 
    occurrence ~ association ~ (beta[co - 
    occur])), outer = T)

Posterior distributions for the indirect effect of the covariate on co-occurrences. This plot shows how much co-occurrence is expected to vary across sites with low proportions of Zosterops hosts (dark red) compared to sites with high proportions of Zosterops (light red)

all_cov_inds <- MCMCvis::MCMCchains(model_run, 
    "beta_x_ind")
cov <- 1
beta_x_inds <- all_cov_inds[, grepl(paste0("beta_x_ind\\[", 
    cov), dimnames(all_cov_inds)[[2]], perl = T)]
dimnames(beta_x_inds)[[2]] <- dimnames(beta_cooccurs)[[2]]

par(mfrow = c(n_nodes - 1, n_nodes - 1), 
    mar = c(4, 2, 2, 1), xpd = T)
for (i in 2:n_nodes) {
    for (j in 1:(n_nodes - 1)) {
        
        if (plot_mat[i, j] == "blank") {
            plot.new()
        } else {
            
            if (j == 1) {
                ylab = colnames(data)[i]
            } else {
                ylab = ""
            }
            
            if ((i - j) == 1) {
                title = colnames(data)[j]
            } else {
                title = ""
            }
            
            samples_neg <- ((beta_x_inds[, 
                grepl(paste0("[", i, ",", 
                  j, "]"), dimnames(beta_cooccurs)[[2]], 
                  fixed = T)] * -0.75) + 
                beta_cooccurs[, grepl(paste0("[", 
                  i, ",", j, "]"), dimnames(beta_cooccurs)[[2]], 
                  fixed = T)])
            samples_pos <- ((beta_x_inds[, 
                grepl(paste0("[", i, ",", 
                  j, "]"), dimnames(beta_cooccurs)[[2]], 
                  fixed = T)] * 0.75) + beta_cooccurs[, 
                grepl(paste0("[", i, ",", 
                  j, "]"), dimnames(beta_cooccurs)[[2]], 
                  fixed = T)])
            plot_marginal(title = title, 
                ylab = ylab, posterior_samples = samples_neg, 
                xlim = range(c(0, range(samples_pos), 
                  range(samples_neg))))
            plot_marginal(title = title, 
                ylab = ylab, posterior_samples = samples_pos, 
                add = T)
            
        }
    }
}
legend("topright", inset = c(0, -0.3), legend = c("low propZos", 
    "high propZos"), bg = "white", col = c(c_mid, 
    c_light), lty = 1, lwd = 2, bty = "n")
mtext(side = 1, line = -1, text = expression(Co - 
    occurrence ~ association ~ (beta[co - 
    occur])), outer = T)

Plot latent variable-induced posterior correlations; these capture all co-occurrence effects, including direct and covariate-mediated effects, so can differ somewhat to the co-occurrence plots above

errors <- MCMCvis::MCMCchains(model_run, 
    "error")

all_errors <- lapply(seq_len(n_nodes), function(i) {
    errors[, grepl(paste0(",", i, "]"), dimnames(errors)[[2]], 
        fixed = T)]
})

corrs <- (lapply(seq_len(dim(errors)[1]), 
    function(i) {
        cov2cor(cov(sapply(all_errors, `[`, 
            i, )))
    }))

par(mfrow = c(n_nodes - 1, n_nodes - 1), 
    mar = c(4, 2, 2, 1), xpd = T)


for (i in 2:n_nodes) {
    for (j in 1:(n_nodes - 1)) {
        
        if (plot_mat[i, j] == "blank") {
            plot.new()
        } else {
            
            if (j == 1) {
                ylab = colnames(data)[i]
            } else {
                ylab = ""
            }
            
            if ((i - j) == 1) {
                title = colnames(data)[j]
            } else {
                title = ""
            }
            
            corr_estimates <- sapply(corrs, 
                "[", i, j)
            
            plot_marginal(title = title, 
                ylab = ylab, posterior_samples = corr_estimates, 
                xlim = c(-1, 1))
            
        }
    }
}
mtext(side = 1, line = -1, text = expression(Latent ~ 
    variable ~ correlations), outer = T)

Posterior predictive checks are always necessary to ensure our model is capable of simulating realistic-looking data that can capture some particular utility functions that are of interest to us as users. First we perform a posterior predictive check of model-simulated prevalence for each parasite and compare to observed prevalence. The model clearly underestimates prevalence of Haemoproteus killangoi, but the remaining simulations are quite realistic

all_preds <- MCMCvis::MCMCchains(model_run, 
    "ypreds")
par(mfrow = c(2, 2), mar = c(4, 2, 2, 1), 
    xpd = T)

for (i in 1:n_nodes) {
    preds <- all_preds[, grepl(paste0(",", 
        i, "]"), dimnames(all_preds)[[2]], 
        fixed = T)]
    sim_prevs <- matrix(NA, nrow = dim(preds)[1], 
        ncol = 1)
    for (x in 1:NROW(sim_prevs)) {
        sim_prevs[x, ] <- sum(preds[x, ])/dim(preds)[2]
    }
    
    plot_marginal(posterior_samples = sim_prevs, 
        truth = (sum(data[, i], na.rm = T)/length(which(!is.na(data[, 
            i])))), ylab = colnames(data)[i], 
        xlim = range(range(sim_prevs) - 0.02, 
            range(sim_prevs) + 0.02), nbins = 20)
    
}
legend("topright", inset = c(0, -0.2), legend = c(expression(mean(hat(y))), 
    expression(mean(y))), bg = "white", col = c(c_mid_highlight, 
    "black"), lty = 1, lwd = 2, bty = "n")
mtext(side = 1, line = -1, text = "Simulated prevalence", 
    outer = T)

As this is a co-occurrence model, we next perform a posterior predictive check for proportion of birds exhibiting coinfections for each pair of parasites. This is a particularly important utility function that we'd like our model to be able to capture adequately. Clearly there is still work to do as some co-infections are estimated to occur more or less frequently than they were observed to in the raw data, but overall the model performs well in the checks that we have thus far implemented (more checking is certainly needed though)

par(mfrow = c(n_nodes - 1, n_nodes - 1), 
    mar = c(4, 2, 2, 1), xpd = T)
all_preds <- MCMCvis::MCMCchains(model_run, 
    "ypreds")

all_y_sims <- lapply(seq_len(n_nodes), function(i) {
    all_preds[, grepl(paste0(",", i, "]"), 
        dimnames(all_preds)[[2]], fixed = T)]
})


for (i in 2:n_nodes) {
    for (j in 1:(n_nodes - 1)) {
        
        if (plot_mat[i, j] == "blank") {
            plot.new()
        } else {
            
            if (j == 1) {
                ylab = colnames(data)[i]
            } else {
                ylab = ""
            }
            
            if ((i - j) == 1) {
                title = colnames(data)[j]
            } else {
                title = ""
            }
            
            coinf_props <- matrix(NA, ncol = 1, 
                nrow = nrow(all_y_sims[[i]]))
            for (x in 1:nrow(coinf_props)) {
                coinf_props[x, ] <- length(which(all_y_sims[[i]][x, 
                  ] == 1 & all_y_sims[[j]][x, 
                  ] == 1))/length(all_y_sims[[i]][x, 
                  ])
            }
            
            plot_marginal(title = title, 
                ylab = ylab, posterior_samples = coinf_props, 
                xlim = range(c(0, range(coinf_props) + 
                  0.02)), truth = length(which(data[, 
                  i] == 1 & data[, j] == 
                  1))/length(which(!is.na(data[, 
                  i]) & !is.na(data[, j]))), 
                nbins = 20)
            
        }
    }
}
legend("topright", inset = c(0, -0.3), legend = c(expression(mean(hat(theta[ij]))), 
    expression(mean(theta[ij]))), bg = "white", 
    col = c(c_mid_highlight, "black"), lty = 1, 
    lwd = 2, bty = "n")
mtext(side = 1, line = -1, text = expression(Simulated ~ 
    coinfection ~ (theta[ij]) ~ prevalence), 
    outer = T)

Bayesian Conditional Random Fields

Nicholas Clark (n.clark@uq.edu.au)