The Data

The data comes from Figshare, and is from Ribera et al.. We download the data from Figshare. It comes are a zip file with 4 HTML files in it, so we extract each one into a temporary data frame. We supress some warnings: they aren’t important.

library(XML) # To import the data
library(nimble) # for the MCMC
## nimble version 0.12.1 is loaded.
## For more information on NIMBLE and a User Manual,
## please visit https://R-nimble.org.
## 
## Attaching package: 'nimble'
## The following object is masked from 'package:stats':
## 
##     simulate
temp <- tempfile()
download.file("https://figshare.com/ndownloader/files/5591666",temp)

EnvData.tmp <- readHTMLTable(unzip(temp, "appendix-A.htm"))[[1]]
SpData.tmp <- readHTMLTable(unzip(temp, "appendix-B.htm"))[[1]]
TraitData.tmp <- readHTMLTable(unzip(temp, "appendix-C.htm"))[[1]]
unlink(temp)

# FormatEnvData
names(EnvData.tmp) <- gsub(" +\\r\\n +", " ", names(EnvData.tmp))
EnvData.tmp$`Land use` <- gsub(" +\\r\\n +", " ", EnvData.tmp$`Land use`)

ConvData <- function(dat, whNum) {
  dat2 <- sapply(names(dat), function(nm, df, nums) {
    if(nm %in%nums) df[,nm] <- as.numeric(df[,nm])
    df[,nm]
  }, df = dat, nums = whNum, simplify =FALSE)
  as.data.frame(dat2)
}

VarToNum <- c("No.", "Sampling year", "Texture", "Org", "pH", "Avail P", 
              "Avail K", "Moist", "Bare", "Litter", "Bryophyte", "Plants/m2", 
              "Canopy height", "Stem density", "Biom 0-5", "Biom 5+", "Repro biom", 
              "Elevation", "Management")
EnvData <- ConvData(EnvData.tmp, whNum = VarToNum) 
SiteTab <- table(EnvData$Code)
SitesToUse <- names(SiteTab)[SiteTab==1]
EnvData <- EnvData[EnvData$Code%in%SitesToUse,]
rownames(EnvData) <- EnvData$Code

# FormatSpeciesData}
names(SpData.tmp) <- SpData.tmp[1,]
SpData.tmp <- SpData.tmp[-1,]
SpData <- ConvData(SpData.tmp, whNum = names(SpData.tmp)[names(SpData.tmp)!="Code"])
rownames(SpData) <- SpData$Code

# FormatTraitData

TraitData.tmp$CODE <- gsub(" +\\r\\n +", " ", TraitData.tmp$CODE)
# TraitData.tmp$CODE%in%SpData$Code # check that all the species are there
ToNum <- names(TraitData.tmp)[!names(TraitData.tmp)%in%c("CODE", "SPECIES")]
TraitData <- ConvData(TraitData.tmp, whNum = ToNum) 
rownames(TraitData) <- TraitData$CODE

rm(SpData.tmp, TraitData.tmp, EnvData.tmp) # clean up

# Abund}
SpToUse <- TraitData$CODE
EnvNames <- c("Texture", "Org", "pH", "Avail.P", "Avail.K", "Moist", "Bare", 
            "Litter", "Bryophyte", "Plants.m2", "Canopy.height", "Stem.density", 
            "Biom.0.5", "Biom.5.", "Repro.biom", "Elevation")

Sites <- EnvData[SitesToUse, EnvNames]
Abund <- t(SpData[SpToUse,SitesToUse])
Traits <- TraitData[SpToUse, grep("^L", names(TraitData))]

Site <- EnvData[,]
rownames(Site) <- NULL
rownames(Site) <- EnvData$Code

OrdConsts <- list(
  NEnv = ncol(Sites),
  NTraits = ncol(Traits),
  NSites = nrow(Abund),
  NSpecies = ncol(Abund),
  NLatent = 10)

OrdData <- list(
  Sites = data.frame(apply(Sites, 2, scale)), 
  Abund = Abund,
  Traits = data.frame(apply(Traits, 2, scale))
  
)

The Model

This is the model. It is an ordination model, with added extras. It is not identifiable, but that is not a problem here: those issues can be sorted out later.

HeirModel2D <- nimbleCode({
  for (sites in 1:NSites) {
    for (species in 1:NSpecies) {
      Mu[sites, species] <- Mu0 + alSiteSTAR[sites] + alSpeciesSTAR[species] + 
        inprod(L_v_STAR[sites,1:NLatent], lamSpSTAR[species,1:NLatent])
      log(lambda[sites, species]) <- Mu[sites, species]
      Abund[sites, species] ~ dpois(lambda[sites, species])
    }      
    for (l in 1:NLatent) {
      muL[sites, l] <- inprod(Sites[sites, 1:NEnv], betaEnvSTAR[1:NEnv, l])
      L_v_STAR[sites,l] ~ dnorm(muL[sites, l], sd=sdEnv[l])
      Latent[sites, l] <- (L_v_STAR[sites, l] - mean(L_v_STAR[1:NSites, l]))/Sitesd[l]
      ResLat[sites,l] <- (L_v_STAR[sites,l] - muL[sites, l])/Sitesd[l]
    }
    
    alSiteSTAR[sites] ~ dnorm(muSite, sd=sdSites)
    alSite[sites] <- alSiteSTAR[sites] - mean(alSiteSTAR[1:NSites])
  }
  
  for(species in 1:NSpecies) {
    for (l in 1:NLatent) {
      muLam[species, l] <- inprod(Traits[species, 1:NTraits], betaTraitsSTAR[1:NTraits, l])
      lamSpSTAR[species, l] ~ dnorm(muLam[species, l], sd=sdFS[l])
      lamSp[species, l] <- (lamSpSTAR[species, l]-mean(lamSpSTAR[1:NSpecies, l])) * Sitesd[l]
      ResLamSp[species, l] <- (lamSpSTAR[species, l] - muLam[species, l]) * Sitesd[l]
    }
    
    alSpeciesSTAR[species] ~ dnorm(muSpecies, sd=sdSpecies)
    alSpecies[species] <- alSpeciesSTAR[species] - mean(alSpeciesSTAR[1:NSpecies])
  }
  
  for (l in 1:NLatent) {
    for(e in 1:NEnv) {
      betaEnvSTAR[e, l] ~ dnorm(0, 1)
      betaEnv[e, l] <- betaEnvSTAR[e, l]/Sitesd[l]
    }
    for (t in 1:NTraits) {
      betaTraitsSTAR[t, l] ~ dnorm(0,1)
      betaTraits[t, l] <- betaTraitsSTAR[t, l]*Sitesd[l]
    }
    sdFS[l] ~ dexp(1)
    sdEnv[l] ~ dexp(1)
    sdTraits[l] ~ dexp(1)
    Sitesd[l] <- sd(L_v_STAR[1:NSites, l])
    
    MeanLV[l] <- mean(L_v_STAR[1:NSites, l])
    MeanLam[l] <- mean(lamSpSTAR[1:NSpecies, l])
  }

  sdSpecies ~ dexp(1)
  sdSites ~ dexp(1)
  muSpecies ~ dnorm(0, sd=1)
  muSite ~ dnorm(0, sd=1)
  Mu0 ~ dnorm(0, sd=1)
  Mean <- Mu0 + mean(alSiteSTAR[1:NSites]) + mean(alSpeciesSTAR[1:NSpecies]) +
    muSpecies + muSite + sum(MeanLV[1:NLatent]) + sum(MeanLam[1:NLatent])
})

This is the code to fit the model. It is not evaluated here, because it crashed the R session. It works if the code is run in the console, but repeatedly gives the following warning (I suspect at least once per iteration, but I haven’t counted):

Warning: after completing nimArr_2_ManyModelAccess, nimCurrent != nimEnd. Perhaps the NimArr was longer than the accessor?

Warning: in nimArr_2_ManyModelAccess, accessor larger than NimArr!

Warning: after completing ManyModelAccess_2_nimArr, nimCurrent != nimEnd. Perhaps the NimArr was longer than the accessor?

OrdConsts$NLatent <- 2

OrdInits2D <- function(dat, NLat) {
  list(sdSpecies = rgamma(1,10,500), sdSites = rgamma(1,10,500), 
       sdEnv = rgamma(NLat,10,500), sdTraits = rgamma(NLat,10,500), 
       sdFS = rgamma(NLat,10,500),
       muSpecies = rnorm(1, 0, 0.05), muSite = rnorm(1,0,0.05), 
       Mu0 = rnorm(1,log(mean(dat$Abund)),0.05), 
       betaEnvSTAR = matrix(rnorm(nrow(dat$Sites), 0, 0.1), ncol=NLat), 
       betaTraitsSTAR = matrix(rnorm(nrow(dat$Traits), 0, 0.1), ncol=NLat), 
       
       L_v_STAR = matrix(rnorm(NLat*nrow(dat$Abund), 0, 0.05), ncol=NLat),
       alSiteSTAR = rnorm(nrow(dat$Abund), 0, 0.05), 
       lamSpSTAR = matrix(rnorm(NLat*ncol(dat$Abund), 0, 0.05), ncol=NLat), 
       alSpeciesSTAR = rnorm(nrow(dat$Abund), 0, 0.05)
  )
}
ToMonitor <- c('betaEnv','betaTraits', 'Latent', 'lamSp', 'Mean', 'alSite', 
               'alSpecies', 'sdEnv', 'sdSpecies', 'ResLat', 'ResLamSp')


HeirOrd2D <- nimbleModel(HeirModel2D,
                      constants = OrdConsts,
                      data      = OrdData,
                      inits     = OrdInits2D(OrdData, 2))
HeirOrd2D.Comp <- compileNimble(HeirOrd2D)
HeirOrd2D.Conf <- configureMCMC(HeirOrd2D.Comp, monitors=ToMonitor)

HeirOrd2D.Conf$removeSamplers(c('alSpeciesSTAR', 'alSiteSTAR', 'L_v_STAR', 'lamSpSTAR'))

HeirOrd2D.Conf$addSampler(target = 
                            c('alSpeciesSTAR', 'alSiteSTAR', 'L_v_STAR', 'lamSpSTAR'), 
                          type = "RW_block")
# If I block these separately, runMCMC() crashes the R session 
# HierOrd2D.Conf$addSampler(target = 'alSpeciesSTAR', type = "RW_block")
# HierOrd2D.Conf$addSampler(target = 'alSiteSTAR', type = "RW_block")
# HierOrd2D.Conf$addSampler(target = 'L_v_STAR', type = "RW_block")
# HierOrd2D.Conf$addSampler(target = 'lamSpSTAR', type = "RW_block")
HeirOrd2D.MCMC <- buildMCMC(HeirOrd2D.Conf)

HeirOrd2D.CompMCMC <- compileNimble(HeirOrd2D.MCMC)
suppressWarnings(
HeirOrd2D.samp <- runMCMC(HeirOrd2D.CompMCMC, 
                          nchains = 2, niter = 2e2, nburnin = 1e2, thin =1,
                          summary = TRUE, WAIC = FALSE)
  
)