============================================================================================================
Part 1: Using JAGS, https://rpubs.com/sherloconan/1015445 (unreliable estimates)

Part 2: Using Stan, https://rpubs.com/sherloconan/1024130

The paradox on real data, https://rpubs.com/sherloconan/1033617

The paradox on simulated data, https://rpubs.com/sherloconan/1036792

Three ways to standardize effects, https://rpubs.com/sherloconan/1071855

Three ways to standardize effects (continued), https://rpubs.com/sherloconan/1071863

GLS and BIC approximation, https://rpubs.com/sherloconan/1093319

MORE: https://osf.io/5dm28/

1. Getting Started

 

The data set (long format) consists of four columns:

  • RT the dependent variable, which is the number of seconds that it took to complete a puzzle;

  • ID which is a participant identifier;

  • shape and color, which are two factors that describe the type of puzzle solved.

shape and color each have two levels, and each of 12 participants completed puzzles within combination of shape and color. The design is thus \(2\times2\) factorial within-subjects (Morey, 2022). Let shape be factor A (with levels \(\alpha_i\)) and color be factor B (with levels \(\beta_j\)) for \(i,j\in\{1,2\}\).

data(puzzles)
dat <- matrix(puzzles$RT, ncol=4,
              dimnames=list(paste0("s",1:12),
                            c("round&mono","square&mono","round&color","square&color"))) #wide data format
cov(dat) %>% #sample covariance matrix
  kable(digits=3) %>% kable_styling(full_width=F)
round&mono square&mono round&color square&color
round&mono 4.727 3.273 4.545 4.545
square&mono 3.273 6.000 5.909 4.727
round&color 4.545 5.909 7.636 5.273
square&color 4.545 4.727 5.273 7.455
solve(cov(dat)) %>% #sample precision matrix
  kable(digits=3) %>% kable_styling(full_width=F)
round&mono square&mono round&color square&color
round&mono 0.755 0.277 -0.439 -0.325
square&mono 0.277 0.860 -0.659 -0.248
round&color -0.439 -0.659 0.838 0.093
square&color -0.325 -0.248 0.093 0.424

 

Method 1: Compare the model without each effect to the full model in (1).

\[\begin{equation} \tag{1} \mathcal{M}_{\mathrm{full}}:\ Y_{ijk}=\mu+s_k+\alpha_i+\beta_j+(\alpha\beta)_{ij}+\epsilon_{ijk},\quad \epsilon_{ijk}\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,\ \sigma_\epsilon^2) \end{equation}\]

set.seed(277)
anovaBF(RT ~ shape * color + ID, data=puzzles, whichRandom="ID", whichModels="top", progress=F)
## Bayes factor top-down analysis
## --------------
## When effect is omitted from shape + color + shape:color + ID , BF is...
## [1] Omit color:shape : 2.780721  ±4.21%
## [2] Omit color       : 0.280169  ±11.35%
## [3] Omit shape       : 0.2494565 ±3.3%
## 
## Against denominator:
##   RT ~ shape + color + shape:color + ID 
## ---
## Bayes factor type: BFlinearModel, JZS

 

Method 2: Include the random slopes in (2) and test the interaction effect again. Still, anecdotal evidence supporting the reduced model (not significant AB).

\[\begin{equation} \tag{2} \mathcal{M}_{\mathrm{full}}^{'}:\ Y_{ijk}=\mu+s_k+\alpha_i+\beta_j+(\alpha\beta)_{ij}+(\alpha s)_{ik}+(\beta s)_{jk}+\epsilon_{ijk},\quad \epsilon_{ijk}\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,\ \sigma_\epsilon^2) \end{equation}\]

set.seed(277)
full <- lmBF(RT~shape+color+ID+shape:color+shape:ID+color:ID, puzzles, whichRandom="ID", progress=F)

set.seed(277)
omit <- lmBF(RT~shape+color+ID+            shape:ID+color:ID, puzzles, whichRandom="ID", progress=F)

omit / full
## Bayes factor analysis
## --------------
## [1] shape + color + ID + shape:ID + color:ID : 2.664999 ±3.24%
## 
## Against denominator:
##   RT ~ shape + color + ID + shape:color + shape:ID + color:ID 
## ---
## Bayes factor type: BFlinearModel, JZS

   

2A. Proposing a New (Full) Model

 

Specify the full model in (3), which has no terms for the subject-specific random effects at all. Compile the model in JAGS.

\[\begin{equation} \tag{3} \mathcal{M}_{\mathrm{full}}^{*}:\ Y_{ijk}=\mu+\alpha_i+\beta_j+(\alpha\beta)_{ij}+\epsilon_{ijk}^{*} \end{equation}\]

\[\begin{equation} \pi(\mu)\propto 1 \end{equation}\]

\[\begin{equation} \alpha_i\mid g_A\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g_A),\quad\beta_j\mid g_B\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g_B),\quad(\alpha\beta)_{ij}\mid g_{AB}\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g_{AB}) \end{equation}\]

\[\begin{equation} g_A\sim\text{Scale-inv-}\chi^2(1,h_A^2),\quad g_B\sim\text{Scale-inv-}\chi^2(1,h_B^2),\quad g_{AB}\sim\text{Scale-inv-}\chi^2(1,h_{AB}^2) \end{equation}\]

\[\begin{equation} \boldsymbol{\epsilon}_k=(\epsilon_{11k}^{*},\epsilon_{21k}^{*},\epsilon_{12k}^{*},\epsilon_{22k}^{*})^{\top}\overset{\text{i.i.d.}}{\sim}\boldsymbol{\mathcal{N}}_4(\mathbf{0},\mathbf{\Omega}^{-1}) \end{equation}\]

\[\begin{equation} \mathbf{\Omega}\sim\mathcal{W}(\mathbf{I}_{4},5) \end{equation}\]

 

Note: the Wishart prior is assumed for the precision matrix \(\mathbf{\Omega}\) (which is the inverse of the covariance matrix) of the error term; thus, the inverse-Wishart prior is assumed for the covariance matrix \(\mathbf{\Sigma}\) of the error term. ChatGPT (GPT-3.5) may confuse them (See pic.).

model.full <- "model {
  for (k in 1:nS) {
    Y[k, 1:(nA*nB)] ~ dmnorm(c(mu), mat_inv) #mat_inv is the precision matrix
  }
  
  for (i in 1:nA) {
    trtA[i] ~ dnorm(0, gA_prec) #g_prec is the precision
  }
  
  for (j in 1:nB) {
    trtB[j] ~ dnorm(0, gB_prec)
  }
  
  for (i in 1:nA) {
    for (j in 1:nB) {
      mu[i,j] <- mu_overall + trtA[i] + trtB[j] + intAB[i,j]
      intAB[i,j] ~ dnorm(0, gAB_prec)
    }
  }

  mu_overall ~ dunif(lwr, upr)
  
  #g_var ~ Scale-inv-chi-squared(df, h^2) <=>
  #g_var ~ Inv-gamma(df/2, scale = df*(h^2)/2) <=>
  #g_prec ~ Gamma(df/2, rate = df*(h^2)/2)
  
  gA_prec ~ dgamma(.5, .5 * hA * hA) #Gamma(shape, rate)
  gB_prec ~ dgamma(.5, .5 * hB * hB)
  gAB_prec ~ dgamma(.5, .5 * hAB * hAB)

  mat_inv ~ dwish(R, nA*nB+1) #R is the scale matrix
}"

summary(dat)
##    round&mono     square&mono     round&color     square&color  
##  Min.   :41.00   Min.   :40.00   Min.   :40.00   Min.   :40.00  
##  1st Qu.:45.75   1st Qu.:44.00   1st Qu.:43.75   1st Qu.:42.25  
##  Median :46.50   Median :45.00   Median :45.00   Median :45.00  
##  Mean   :46.00   Mean   :45.00   Mean   :45.00   Mean   :44.00  
##  3rd Qu.:47.00   3rd Qu.:46.25   3rd Qu.:46.25   3rd Qu.:45.25  
##  Max.   :49.00   Max.   :49.00   Max.   :49.00   Max.   :48.00
datalist <- list("nA"=2, "nB"=2, "nS"=nrow(dat), "R"=diag(4), "Y"=dat,
                 "hA"=1, "hB"=1, "hAB"=1, "lwr"=mean(dat)-6*sd(dat), "upr"=mean(dat)+6*sd(dat))

set.seed(277) #Note: `jags.seed` is used for jags.parallell() and does not work for jags()
jags.full <- jags(model.file=textConnection(model.full),
                  data=datalist, n.chains=2, n.iter=100000, n.burnin=50000, quiet=T, progress.bar="none",
                  inits=list(list("mu_overall"=mean(datalist$Y)*1.01),
                             list("mu_overall"=mean(datalist$Y)*0.99)),
                  parameters.to.save=c("mu_overall","mu","trtA","trtB","intAB",
                                       "gA_prec","gB_prec","gAB_prec","mat_inv"))

   

Note: Random effects are assumed. See the discussion here.

   

3A. Log Posterior Function

 

Take a parameter vector and the data as input and return the log of the unnormalized posterior density (i.e., a scalar value).

log.posterior.full <- function(pars, data) {
  mu_overall <- pars["mu_overall"] #grand mean
  mu <- c(pars["mu[1,1]"], pars["mu[1,2]"], pars["mu[2,1]"], pars["mu[2,2]"]) #condition means
  trtA <- c(pars["trtA[1]"], pars["trtA[2]"]) #treatment effect A
  trtB <- c(pars["trtB[2]"], pars["trtB[2]"]) #treatment effect B
  intAB <- c(pars["intAB[1,1]"], pars["intAB[1,2]"], pars["intAB[2,1]"], pars["intAB[2,2]"]) #interaction effect
  gA_prec <- pars["gA_prec"] #precision of A
  gB_prec <- pars["gB_prec"] #precision of B
  gAB_prec <- pars["gAB_prec"] #precision of the interaction
  gA_sd <- 1/sqrt(gA_prec) #standard deviation of A
  gB_sd <- 1/sqrt(gB_prec) #standard deviation of B
  gAB_sd <- 1/sqrt(gAB_prec) #standard deviation of the interaction
  mat_inv <- matrix(c(pars["mat_inv[1,1]"],pars["mat_inv[2,1]"],pars["mat_inv[3,1]"],pars["mat_inv[4,1]"],
                      pars["mat_inv[1,2]"],pars["mat_inv[2,2]"],pars["mat_inv[3,2]"],pars["mat_inv[4,2]"],
                      pars["mat_inv[1,3]"],pars["mat_inv[2,3]"],pars["mat_inv[3,3]"],pars["mat_inv[4,3]"],
                      pars["mat_inv[1,4]"],pars["mat_inv[2,4]"],pars["mat_inv[3,4]"],pars["mat_inv[4,4]"]),
                    ncol=4) #precision matrix of the error term
  if (min(eigen(mat_inv)$value) < 0 || det(mat_inv) < 5e-6) {
    #impose the symmetry and the positive definiteness (see Section 6)
    Sigma_inv <- as.matrix(Matrix::nearPD(mat_inv, keepDiag=T)$mat)
  } else {
    Sigma_inv <- (mat_inv + t(mat_inv)) / 2
  }
  Sigma <- solve(Sigma_inv) #covariance matrix of the error term
  
  #log of the unnormalized posterior density
  dunif(mu_overall, data$lwr, data$upr, log=T) +
  sum(dnorm(trtA, 0, gA_sd, log=T)) +
  sum(dnorm(trtB, 0, gB_sd, log=T)) +
  sum(dnorm(intAB, 0, gAB_sd, log =T)) +
  dgamma(gA_prec, 0.5, 0.5 * data$hA * data$hA, log=T) +
  dgamma(gB_prec, 0.5, 0.5 * data$hB * data$hB, log=T) +
  dgamma(gAB_prec, 0.5, 0.5 * data$hAB * data$hAB, log=T) +
  CholWishart::dWishart(Sigma_inv, data$nA * data$nB +1, data$R, log=T) +
  sum(mvtnorm::dmvnorm(data$Y, mu, Sigma, log=T))
}

 

We did the debugging! See the discussion here.

The R2jags::jags (v 0.7-1) object alphabetically sorts the parameters by uppercase first, e.g., B, C, a, b, c.

And, bridgesampling::bridgesampler (v 1.1-2) somehow ignores all the parameters before “deviance”. The GitHub version (e.g., v 1.1-5) may have fixed it.

devtools::install_github("quentingronau/bridgesampling@master")

   

4A. Bounds for Parameters

 

Create named vectors with lower and upper bounds (lb and ub) for parameters.

pars <- colnames(jags.full$BUGSoutput$sims.matrix)
pars <- pars[pars != "deviance"]
ub.full <- rep(Inf, length(pars))
names(ub.full) <- pars
ub.full["mu_overall"] <- datalist$upr
lb.full <- -ub.full
lb.full["mu_overall"] <- datalist$lwr
lb.full[grepl("_prec",pars)] <- 0
lb.full[paste0("mat_inv[",1:(datalist$nA*datalist$nB),",",1:(datalist$nA*datalist$nB),"]")] <- 0

lb.full #lower bounds
##     gAB_prec      gA_prec      gB_prec   intAB[1,1]   intAB[2,1]   intAB[1,2] 
##      0.00000      0.00000      0.00000         -Inf         -Inf         -Inf 
##   intAB[2,2] mat_inv[1,1] mat_inv[2,1] mat_inv[3,1] mat_inv[4,1] mat_inv[1,2] 
##         -Inf      0.00000         -Inf         -Inf         -Inf         -Inf 
## mat_inv[2,2] mat_inv[3,2] mat_inv[4,2] mat_inv[1,3] mat_inv[2,3] mat_inv[3,3] 
##      0.00000         -Inf         -Inf         -Inf         -Inf      0.00000 
## mat_inv[4,3] mat_inv[1,4] mat_inv[2,4] mat_inv[3,4] mat_inv[4,4]      mu[1,1] 
##         -Inf         -Inf         -Inf         -Inf      0.00000         -Inf 
##      mu[2,1]      mu[1,2]      mu[2,2]   mu_overall      trtA[1]      trtA[2] 
##         -Inf         -Inf         -Inf     29.64048         -Inf         -Inf 
##      trtB[1]      trtB[2] 
##         -Inf         -Inf
ub.full #upper bounds
##     gAB_prec      gA_prec      gB_prec   intAB[1,1]   intAB[2,1]   intAB[1,2] 
##          Inf          Inf          Inf          Inf          Inf          Inf 
##   intAB[2,2] mat_inv[1,1] mat_inv[2,1] mat_inv[3,1] mat_inv[4,1] mat_inv[1,2] 
##          Inf          Inf          Inf          Inf          Inf          Inf 
## mat_inv[2,2] mat_inv[3,2] mat_inv[4,2] mat_inv[1,3] mat_inv[2,3] mat_inv[3,3] 
##          Inf          Inf          Inf          Inf          Inf          Inf 
## mat_inv[4,3] mat_inv[1,4] mat_inv[2,4] mat_inv[3,4] mat_inv[4,4]      mu[1,1] 
##          Inf          Inf          Inf          Inf          Inf          Inf 
##      mu[2,1]      mu[1,2]      mu[2,2]   mu_overall      trtA[1]      trtA[2] 
##          Inf          Inf          Inf     60.35952          Inf          Inf 
##      trtB[1]      trtB[2] 
##          Inf          Inf

   

5A. Bridge Sampling

 

Compute log marginal likelihood via bridge sampling.

set.seed(277)
bridge.full <- bridge_sampler(samples=jags.full,
                              log_posterior=log.posterior.full,
                              data=datalist,
                              lb=lb.full, ub=ub.full, silent=T)
summary(bridge.full)
## 
## Bridge sampling log marginal likelihood estimate 
## (method = "normal", repetitions = 1):
## 
##  -185.5664
## 
## Error Measures:
## 
##  Relative Mean-Squared Error: 0.003500541
##  Coefficient of Variation: 0.05916537
##  Percentage Error: 6%
## 
## Note:
## All error measures are approximate.

   

2B. Proposing a New (Omitted) Model

 

Specify the model without the interaction term. Compile the model in JAGS.

model.omit <- "model {
  for (k in 1:nS) {
    Y[k, 1:(nA*nB)] ~ dmnorm(c(mu), mat_inv) 
  }
  
  for (i in 1:nA) {
    trtA[i] ~ dnorm(0, gA_prec)
  }
  
  for (j in 1:nB) {
    trtB[j] ~ dnorm(0, gB_prec)
  }
  
  for (i in 1:nA) {
    for (j in 1:nB) {
      mu[i,j] <- mu_overall + trtA[i] + trtB[j]
    }
  }

  mu_overall ~ dunif(lwr, upr)
  
  gA_prec ~ dgamma(.5, .5 * hA * hA)
  gB_prec ~ dgamma(.5, .5 * hB * hB)

  mat_inv ~ dwish(R, nA*nB+1)
}"

set.seed(277)
jags.omit <- jags(model.file=textConnection(model.omit),
                  data=datalist[-8], n.chains=2, n.iter=100000, n.burnin=50000, quiet=T, progress.bar="none",
                  inits=list(list("mu_overall"=mean(datalist$Y)*1.01),
                             list("mu_overall"=mean(datalist$Y)*0.99)),
                  parameters.to.save=c("mu_overall","mu","trtA","trtB",
                                       "gA_prec","gB_prec","mat_inv"))

   

3B. Log Posterior Function

 

Take a parameter vector and the data as input and return the log of the unnormalized posterior density (i.e., a scalar value).

log.posterior.omit <- function(pars, data) {
  mu_overall <- pars["mu_overall"]
  mu <- c(pars["mu[1,1]"], pars["mu[1,2]"], pars["mu[2,1]"], pars["mu[2,2]"])
  trtA <- c(pars["trtA[1]"], pars["trtA[2]"])
  trtB <- c(pars["trtB[2]"], pars["trtB[2]"])
  gA_prec <- pars["gA_prec"]
  gB_prec <- pars["gB_prec"]
  gA_sd <- 1/sqrt(gA_prec)
  gB_sd <- 1/sqrt(gB_prec)
  mat_inv <- matrix(c(pars["mat_inv[1,1]"],pars["mat_inv[2,1]"],pars["mat_inv[3,1]"],pars["mat_inv[4,1]"],
                      pars["mat_inv[1,2]"],pars["mat_inv[2,2]"],pars["mat_inv[3,2]"],pars["mat_inv[4,2]"],
                      pars["mat_inv[1,3]"],pars["mat_inv[2,3]"],pars["mat_inv[3,3]"],pars["mat_inv[4,3]"],
                      pars["mat_inv[1,4]"],pars["mat_inv[2,4]"],pars["mat_inv[3,4]"],pars["mat_inv[4,4]"]),
                    ncol=4)
  if (min(eigen(mat_inv)$value) < 0 || det(mat_inv) < 5e-6) {
    Sigma_inv <- as.matrix(Matrix::nearPD(mat_inv, keepDiag=T)$mat)
  } else {
    Sigma_inv <- (mat_inv + t(mat_inv)) / 2
  }
  Sigma <- solve(Sigma_inv)
  
  dunif(mu_overall, data$lwr, data$upr, log = TRUE) +
  sum(dnorm(trtA, 0, gA_sd, log = TRUE)) +
  sum(dnorm(trtB, 0, gB_sd, log = TRUE)) +
  dgamma(gA_prec, 0.5, 0.5 * data$hA * data$hA, log = TRUE) +
  dgamma(gB_prec, 0.5, 0.5 * data$hB * data$hB, log = TRUE) +
  CholWishart::dWishart(Sigma_inv, data$nA * data$nB +1, data$R, log = TRUE) +
  sum(mvtnorm::dmvnorm(data$Y, mu, Sigma, log = TRUE))
}

   

4B. Bounds for Parameters

 

Create named vectors with lower and upper bounds (lb and ub) for parameters.

pars <- colnames(jags.omit$BUGSoutput$sims.matrix)
pars <- pars[pars != "deviance"]
ub.omit <- rep(Inf, length(pars))
names(ub.omit) <- pars
ub.omit["mu_overall"] <- datalist$upr
lb.omit <- -ub.omit
lb.omit["mu_overall"] <- datalist$lwr
lb.omit[grepl("_prec",pars)] <- 0
lb.omit[paste0("mat_inv[",1:(datalist$nA*datalist$nB),",",1:(datalist$nA*datalist$nB),"]")] <- 0

lb.omit #lower bounds
##      gA_prec      gB_prec mat_inv[1,1] mat_inv[2,1] mat_inv[3,1] mat_inv[4,1] 
##      0.00000      0.00000      0.00000         -Inf         -Inf         -Inf 
## mat_inv[1,2] mat_inv[2,2] mat_inv[3,2] mat_inv[4,2] mat_inv[1,3] mat_inv[2,3] 
##         -Inf      0.00000         -Inf         -Inf         -Inf         -Inf 
## mat_inv[3,3] mat_inv[4,3] mat_inv[1,4] mat_inv[2,4] mat_inv[3,4] mat_inv[4,4] 
##      0.00000         -Inf         -Inf         -Inf         -Inf      0.00000 
##      mu[1,1]      mu[2,1]      mu[1,2]      mu[2,2]   mu_overall      trtA[1] 
##         -Inf         -Inf         -Inf         -Inf     29.64048         -Inf 
##      trtA[2]      trtB[1]      trtB[2] 
##         -Inf         -Inf         -Inf
ub.omit #upper bounds
##      gA_prec      gB_prec mat_inv[1,1] mat_inv[2,1] mat_inv[3,1] mat_inv[4,1] 
##          Inf          Inf          Inf          Inf          Inf          Inf 
## mat_inv[1,2] mat_inv[2,2] mat_inv[3,2] mat_inv[4,2] mat_inv[1,3] mat_inv[2,3] 
##          Inf          Inf          Inf          Inf          Inf          Inf 
## mat_inv[3,3] mat_inv[4,3] mat_inv[1,4] mat_inv[2,4] mat_inv[3,4] mat_inv[4,4] 
##          Inf          Inf          Inf          Inf          Inf          Inf 
##      mu[1,1]      mu[2,1]      mu[1,2]      mu[2,2]   mu_overall      trtA[1] 
##          Inf          Inf          Inf          Inf     60.35952          Inf 
##      trtA[2]      trtB[1]      trtB[2] 
##          Inf          Inf          Inf

   

5B. Bridge Sampling

 

Compute log marginal likelihood via bridge sampling.

set.seed(277)
bridge.omit <- bridge_sampler(samples=jags.omit,
                              log_posterior=log.posterior.omit,
                              data=datalist[-8],
                              lb=lb.omit, ub=ub.omit, silent=T)
summary(bridge.omit)
## 
## Bridge sampling log marginal likelihood estimate 
## (method = "normal", repetitions = 1):
## 
##  -183.4822
## 
## Error Measures:
## 
##  Relative Mean-Squared Error: 0.002791377
##  Coefficient of Variation: 0.05283348
##  Percentage Error: 5%
## 
## Note:
## All error measures are approximate.

   

Bayes factor (under investigation).

exp(bridge.omit$logml - bridge.full$logml)
## [1] 8.038268
bf(bridge.omit, bridge.full) #SAME
## Estimated Bayes factor in favor of bridge.omit over bridge.full: 8.03827

The estimates are unreliable: the Bayes factor values range from 0.016 to 13.210 by varying 50 random seeds and then re-computing.

   

Check the Monte Carlo error of estimating the Bayes factor.

num <- 50; BF <- PerErr.Full <- PerErr.Omit <- numeric(num)
system.time(
for (i in 1:num) { #roughly 1h of run time
  result <- BFBS(seed=i) #defined in All-in-One R Script
  BF[i] <- result$bf
  PerErr.Full[i] <- result$pererr.full
  PerErr.Omit[i] <- result$pererr.omit
})


range(PerErr.Full); range(PerErr.Omit)

hist(BF,prob=T,col="white",yaxt="n",ylab="",
     xlab="Bayes factor (omitted AB) estimates",main="")
lines(density(BF),col="red",lwd=2)
## [1] 0.05215902 0.06498805
## [1] 0.04683258 0.05501173

   

6. Issues (solved)

 

Internally, the bridge sampler generates samples from a multivariate normal distribution. When you try to reconstruct the Sigma matrix from these samples the result is not symmetric and may not be positive definite either. Both of these issues will create an error message from CholWishart::dWishart inside the log.posterior.full function (Plummer, 2022).

#one negative eigenvalue
issue <- matrix(c(1.1295254,0.4658979,-0.8727553,-0.2521035,
                  0.4663502,1.1937169,-0.9350007,-0.4105077,
                  -0.8733574,-0.9346202,1.1152784,0.1664068,
                  -0.2525762,-0.4106099,0.1664256,0.4964147), ncol=4, byrow=T)

#eigenvalue close to zero
singular <- matrix(c(0.7573508, 0.6297603, -0.78943891, -0.28205123,
                     0.6305914, 1.2464388, -1.23028009, -0.53885046,
                     -0.7887137, -1.2304128, 1.65066378, 0.08047224,
                     -0.2813410, -0.5389635, 0.08109029, 0.78802955), ncol=4, byrow=T)

#two negative eigenvalues
error <- matrix(c(1.0466132, 1.1008902, -1.3268045, -0.7891338,
                  1.1003529, 0.9572582, -1.2419223, -0.7404087,
                  -1.3267065, -1.2419124, 1.2020524, 0.7253173,
                  -0.7893596, -0.7404482, 0.7249186, 0.7322508), ncol=4, byrow=T)

error_sym <- (error + t(error)) / 2
CholWishart::dWishart(error_sym, 5, diag(4), log=T) #ERROR
## Error in chol.default(x[, , i]): the leading minor of order 2 is not positive
eigen(error_sym)$value
## [1]  4.03620111  0.23869492 -0.09704096 -0.23968046
nearPSD <- function(mat) {
  #' https://www.r-bloggers.com/2013/08/correcting-a-pseudo-correlation-matrix-to-be-positive-semidefinite/
  n <- nrow(mat)
  E <- eigen(mat)
  E$vectors %*% tcrossprod(diag(pmax(E$values, 0), n), E$vectors)
}

error_PSD <- nearPSD(error_sym)
CholWishart::dWishart(error_PSD, 5, diag(4), log=T) #still ERROR
## Error in chol.default(x[, , i]): the leading minor of order 3 is not positive
#transformation only ensures the positive semi-definiteness (not the positive definiteness)
eigen(error_PSD)$value
## [1] 4.036201e+00 2.386949e-01 4.301442e-16 5.874668e-20

 

Use a function like nearPD from the “Matrix” package to ensure that the reconstructed matrix is positive definite by adding a small diagonal perturbation. For example:

issue_PD <- Matrix::nearPD(issue, keepDiag=T)$mat
(issue_PD <- as.matrix(issue_PD))
##            [,1]       [,2]       [,3]       [,4]
## [1,]  1.1295254  0.4673679 -0.8713938 -0.2513799
## [2,]  0.4673679  1.1937169 -0.9327035 -0.4093422
## [3,] -0.8713938 -0.9327035  1.1152784  0.1680422
## [4,] -0.2513799 -0.4093422  0.1680422  0.4964147
solve(issue_PD)
##         [,1]     [,2]     [,3]    [,4]
## [1,] 5607546  7106385  9498016 5484325
## [2,] 7106385  9005851 12036740 6950231
## [3,] 9498016 12036740 16087666 9289307
## [4,] 5484325  6950231  9289307 5363816

It may occasionally (e.g., thrice per 500 runs) return a warning message: nearPD did not converge in 100 iterations.

warn <- matrix(c(1.2645163, 0.7925088, -1.1702794, -0.4044887,
                 0.7923883, 1.7609359, -1.2463898, -0.7091416,
                 -1.1705070, -1.2462720, 0.9276745, 0.8707425,
                 -0.4041632, -0.7089192, 0.8708052, 0.2258974), ncol=4, byrow=T)
warn_PD <- Matrix::nearPD(warn, keepDiag=T)$mat
## Warning in Matrix::nearPD(warn, keepDiag = T): 'nearPD()' did not converge in
## 100 iterations
(warn_PD <- as.matrix(warn_PD))
##            [,1]       [,2]       [,3]       [,4]
## [1,]  1.2645163  0.8668866 -0.9670406 -0.4322231
## [2,]  0.8668866  1.7609359 -1.1314389 -0.5982772
## [3,] -0.9670406 -1.1314389  0.9276745  0.4518036
## [4,] -0.4322231 -0.5982772  0.4518036  0.2258974
solve(warn_PD)
##         [,1]    [,2]     [,3]     [,4]
## [1,] 5069556 4484818  9475644  2626031
## [2,] 4484818 4350534  7345314  5412318
## [3,] 9475644 7345314 20520968 -3458742
## [4,] 2626031 5412318 -3458742 26276399

   

7. Diagnostics

 

[7.1] Check the Monte Carlo error of estimating the log marginal likelihood. Histogram of 500 bridge sampling calls.

num <- 500; logML <- numeric(num)
for (i in 1:num) { #roughly five hours of run time
  set.seed(i)
  jags.full.rerun <- jags(model.file=textConnection(model.full),
                          data=datalist, n.chains=2, n.iter=100000, n.burnin=50000, quiet=T, progress.bar="none",
                          inits=list(list("mu_overall"=mean(datalist$Y)*1.01),
                                     list("mu_overall"=mean(datalist$Y)*0.99)),
                          parameters.to.save=c("mu_overall","mu","trtA","trtB","intAB",
                                               "gA_prec","gB_prec","gAB_prec","mat_inv"))
  logML[i] <- bridge_sampler(samples=jags.full.rerun,
                             log_posterior=log.posterior.full,
                             data=datalist,
                             lb=lb.full, ub=ub.full, silent=T)$logml
}
hist(logML,breaks=15,prob=T,col="white",yaxt="n",ylab="",
     ylim=c(0,0.3),xlim=c(-195,-170),xlab="Log marginal likelihood estimates",main="")
lines(density(logML),col="red",lwd=2)

 

[7.2] Plots of iterations versus sampled values for each variable in the chain.

pos.full <- as.mcmc(jags.full)
plot(pos.full, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)

pos.omit <- as.mcmc(jags.omit)
plot(pos.omit, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)

   

All-in-One R Script

 

library(R2jags) #install JAGS at first: http://mcmc-jags.sourceforge.net/
# devtools::install_github("quentingronau/bridgesampling@master")
library(bridgesampling)
library(BayesFactor)
require(mvtnorm)
require(CholWishart)

data(puzzles)
dat <- matrix(puzzles$RT, ncol=4,
              dimnames=list(paste0("s",1:12),
                            c("round&mono","square&mono","round&color","square&color"))) #wide data format
summary(dat)
cov(dat) #sample covariance matrix
solve(cov(dat)) #sample precision matrix

set.seed(277)
anovaBF(RT ~ shape * color + ID, data=puzzles, whichRandom="ID", whichModels="top", progress=F)

set.seed(277)
full <- lmBF(RT~shape+color+ID+shape:color+shape:ID+color:ID, puzzles, whichRandom="ID", progress=F)

set.seed(277)
omit <- lmBF(RT~shape+color+ID+            shape:ID+color:ID, puzzles, whichRandom="ID", progress=F)

omit / full


{
  model.full <- "model {
    for (k in 1:nS) {
      Y[k, 1:(nA*nB)] ~ dmnorm(c(mu), mat_inv) #mat_inv is the precision matrix
    }
  
    for (i in 1:nA) {
      trtA[i] ~ dnorm(0, gA_prec) #g_prec is the precision
    }
  
    for (j in 1:nB) {
      trtB[j] ~ dnorm(0, gB_prec)
    }
  
    for (i in 1:nA) {
      for (j in 1:nB) {
        mu[i,j] <- mu_overall + trtA[i] + trtB[j] + intAB[i,j]
        intAB[i,j] ~ dnorm(0, gAB_prec)
      }
    }

    mu_overall ~ dunif(lwr, upr)
  
    #g_var ~ Scale-inv-chi-squared(df, h^2) <=>
    #g_var ~ Inv-gamma(df/2, scale = df*(h^2)/2) <=>
    #g_prec ~ Gamma(df/2, rate = df*(h^2)/2)
  
    gA_prec ~ dgamma(.5, .5 * hA * hA) #Gamma(shape, rate)
    gB_prec ~ dgamma(.5, .5 * hB * hB)
    gAB_prec ~ dgamma(.5, .5 * hAB * hAB)

    mat_inv ~ dwish(R, nA*nB+1) #R is the scale matrix
  }"

  model.omit <- "model {
    for (k in 1:nS) {
      Y[k, 1:(nA*nB)] ~ dmnorm(c(mu), mat_inv) 
    }
  
    for (i in 1:nA) {
      trtA[i] ~ dnorm(0, gA_prec)
    }
  
    for (j in 1:nB) {
      trtB[j] ~ dnorm(0, gB_prec)
    }
  
    for (i in 1:nA) {
      for (j in 1:nB) {
        mu[i,j] <- mu_overall + trtA[i] + trtB[j]
      }
    }

    mu_overall ~ dunif(lwr, upr)
  
    gA_prec ~ dgamma(.5, .5 * hA * hA)
    gB_prec ~ dgamma(.5, .5 * hB * hB)

    mat_inv ~ dwish(R, nA*nB+1)
  }"
} #JAGS models

datalist <- list("nA"=2, "nB"=2, "nS"=nrow(dat), "R"=diag(4), "Y"=dat,
                 "hA"=1, "hB"=1, "hAB"=1, "lwr"=mean(dat)-6*sd(dat), "upr"=mean(dat)+6*sd(dat))

{
  log.posterior.full <- function(pars, data) {
    mu_overall <- pars["mu_overall"] #grand mean
    mu <- c(pars["mu[1,1]"], pars["mu[1,2]"], pars["mu[2,1]"], pars["mu[2,2]"]) #condition means
    trtA <- c(pars["trtA[1]"], pars["trtA[2]"]) #treatment effect A
    trtB <- c(pars["trtB[2]"], pars["trtB[2]"]) #treatment effect B
    intAB <- c(pars["intAB[1,1]"], pars["intAB[1,2]"], pars["intAB[2,1]"], pars["intAB[2,2]"]) #interaction effect
    gA_prec <- pars["gA_prec"] #precision of A
    gB_prec <- pars["gB_prec"] #precision of B
    gAB_prec <- pars["gAB_prec"] #precision of the interaction
    gA_sd <- 1/sqrt(gA_prec) #standard deviation of A
    gB_sd <- 1/sqrt(gB_prec) #standard deviation of B
    gAB_sd <- 1/sqrt(gAB_prec) #standard deviation of the interaction
    mat_inv <- matrix(c(pars["mat_inv[1,1]"],pars["mat_inv[2,1]"],pars["mat_inv[3,1]"],pars["mat_inv[4,1]"],
                        pars["mat_inv[1,2]"],pars["mat_inv[2,2]"],pars["mat_inv[3,2]"],pars["mat_inv[4,2]"],
                        pars["mat_inv[1,3]"],pars["mat_inv[2,3]"],pars["mat_inv[3,3]"],pars["mat_inv[4,3]"],
                        pars["mat_inv[1,4]"],pars["mat_inv[2,4]"],pars["mat_inv[3,4]"],pars["mat_inv[4,4]"]),
                      ncol=4) #precision matrix of the error term
    if (min(eigen(mat_inv)$value) < 0 || det(mat_inv) < 5e-6) {
      #impose the symmetry and the positive definiteness (see Section 6)
      Sigma_inv <- as.matrix(Matrix::nearPD(mat_inv, keepDiag=T)$mat)
    } else {
      Sigma_inv <- (mat_inv + t(mat_inv)) / 2
    }
    Sigma <- solve(Sigma_inv) #covariance matrix of the error term
    
    #log of the unnormalized posterior density
    dunif(mu_overall, data$lwr, data$upr, log=T) +
      sum(dnorm(trtA, 0, gA_sd, log=T)) +
      sum(dnorm(trtB, 0, gB_sd, log=T)) +
      sum(dnorm(intAB, 0, gAB_sd, log =T)) +
      dgamma(gA_prec, 0.5, 0.5 * data$hA * data$hA, log=T) +
      dgamma(gB_prec, 0.5, 0.5 * data$hB * data$hB, log=T) +
      dgamma(gAB_prec, 0.5, 0.5 * data$hAB * data$hAB, log=T) +
      CholWishart::dWishart(Sigma_inv, data$nA * data$nB +1, data$R, log=T) +
      sum(mvtnorm::dmvnorm(data$Y, mu, Sigma, log=T))
  }
  
  log.posterior.omit <- function(pars, data) {
    mu_overall <- pars["mu_overall"]
    mu <- c(pars["mu[1,1]"], pars["mu[1,2]"], pars["mu[2,1]"], pars["mu[2,2]"])
    trtA <- c(pars["trtA[1]"], pars["trtA[2]"])
    trtB <- c(pars["trtB[2]"], pars["trtB[2]"])
    gA_prec <- pars["gA_prec"]
    gB_prec <- pars["gB_prec"]
    gA_sd <- 1/sqrt(gA_prec)
    gB_sd <- 1/sqrt(gB_prec)
    mat_inv <- matrix(c(pars["mat_inv[1,1]"],pars["mat_inv[2,1]"],pars["mat_inv[3,1]"],pars["mat_inv[4,1]"],
                        pars["mat_inv[1,2]"],pars["mat_inv[2,2]"],pars["mat_inv[3,2]"],pars["mat_inv[4,2]"],
                        pars["mat_inv[1,3]"],pars["mat_inv[2,3]"],pars["mat_inv[3,3]"],pars["mat_inv[4,3]"],
                        pars["mat_inv[1,4]"],pars["mat_inv[2,4]"],pars["mat_inv[3,4]"],pars["mat_inv[4,4]"]),
                      ncol=4)
    if (min(eigen(mat_inv)$value) < 0 || det(mat_inv) < 5e-6) {
      Sigma_inv <- as.matrix(Matrix::nearPD(mat_inv, keepDiag=T)$mat)
    } else {
      Sigma_inv <- (mat_inv + t(mat_inv)) / 2
    }
    Sigma <- solve(Sigma_inv)
    
    dunif(mu_overall, data$lwr, data$upr, log = TRUE) +
      sum(dnorm(trtA, 0, gA_sd, log = TRUE)) +
      sum(dnorm(trtB, 0, gB_sd, log = TRUE)) +
      dgamma(gA_prec, 0.5, 0.5 * data$hA * data$hA, log = TRUE) +
      dgamma(gB_prec, 0.5, 0.5 * data$hB * data$hB, log = TRUE) +
      CholWishart::dWishart(Sigma_inv, data$nA * data$nB +1, data$R, log = TRUE) +
      sum(mvtnorm::dmvnorm(data$Y, mu, Sigma, log = TRUE))
  }
} #log posterior functions

{
  ub.full <- rep(Inf, 32)
  names(ub.full) <- c("gAB_prec","gA_prec","gB_prec",
                      "intAB[1,1]","intAB[2,1]","intAB[1,2]","intAB[2,2]",
                      "mat_inv[1,1]","mat_inv[2,1]","mat_inv[3,1]","mat_inv[4,1]",
                      "mat_inv[1,2]","mat_inv[2,2]","mat_inv[3,2]","mat_inv[4,2]",
                      "mat_inv[1,3]","mat_inv[2,3]","mat_inv[3,3]","mat_inv[4,3]",
                      "mat_inv[1,4]","mat_inv[2,4]","mat_inv[3,4]","mat_inv[4,4]",
                      "mu[1,1]","mu[2,1]","mu[1,2]","mu[2,2]","mu_overall",
                      "trtA[1]","trtA[2]","trtB[1]","trtB[2]")
  ub.full["mu_overall"] <- datalist$upr
  lb.full <- -ub.full
  lb.full["mu_overall"] <- datalist$lwr
  lb.full[grepl("_prec", names(ub.full))] <- 0
  lb.full[paste0("mat_inv[",1:(datalist$nA*datalist$nB),",",1:(datalist$nA*datalist$nB),"]")] <- 0
  
  ub.omit <- rep(Inf, 27)
  names(ub.omit) <- c("gA_prec","gB_prec",
                      "mat_inv[1,1]","mat_inv[2,1]","mat_inv[3,1]","mat_inv[4,1]",
                      "mat_inv[1,2]","mat_inv[2,2]","mat_inv[3,2]","mat_inv[4,2]",
                      "mat_inv[1,3]","mat_inv[2,3]","mat_inv[3,3]","mat_inv[4,3]",
                      "mat_inv[1,4]","mat_inv[2,4]","mat_inv[3,4]","mat_inv[4,4]",
                      "mu[1,1]","mu[2,1]","mu[1,2]","mu[2,2]","mu_overall",
                      "trtA[1]","trtA[2]","trtB[1]","trtB[2]")
  ub.omit["mu_overall"] <- datalist$upr
  lb.omit <- -ub.omit
  lb.omit["mu_overall"] <- datalist$lwr
  lb.omit[grepl("_prec", names(ub.omit))] <- 0
  lb.omit[paste0("mat_inv[",1:(datalist$nA*datalist$nB),",",1:(datalist$nA*datalist$nB),"]")] <- 0
} #bounds for parameters

BFBS <- function(seed=277, diagnostics=F) {
  set.seed(seed)
  jags.full <- jags(model.file=textConnection(model.full),
                    data=datalist, n.chains=2, n.iter=100000, n.burnin=50000, quiet=T, progress.bar="none",
                    inits=list(list("mu_overall"=mean(datalist$Y)*1.01),
                               list("mu_overall"=mean(datalist$Y)*0.99)),
                    parameters.to.save=c("mu_overall","mu","trtA","trtB","intAB",
                                         "gA_prec","gB_prec","gAB_prec","mat_inv"))
  
  set.seed(seed)
  jags.omit <- jags(model.file=textConnection(model.omit),
                    data=datalist[-8], n.chains=2, n.iter=100000, n.burnin=50000, quiet=T, progress.bar="none",
                    inits=list(list("mu_overall"=mean(datalist$Y)*1.01),
                               list("mu_overall"=mean(datalist$Y)*0.99)),
                    parameters.to.save=c("mu_overall","mu","trtA","trtB",
                                         "gA_prec","gB_prec","mat_inv"))
  
  set.seed(seed)
  bridge.full <- bridge_sampler(samples=jags.full,
                                log_posterior=log.posterior.full,
                                data=datalist,
                                lb=lb.full, ub=ub.full, silent=T)
  
  set.seed(seed)
  bridge.omit <- bridge_sampler(samples=jags.omit,
                                log_posterior=log.posterior.omit,
                                data=datalist[-8],
                                lb=lb.omit, ub=ub.omit, silent=T)
  
  if (diagnostics) {
    list("bf"=exp(bridge.full$logml - bridge.omit$logml),
         "pererr.full"=error_measures(bridge.full)$cv,
         "pererr.omit"=error_measures(bridge.omit)$cv,
         "jags.full"=as.mcmc(jags.full), "jags.omit"=as.mcmc(bridge.omit))
  } else {
    list("bf"=exp(bridge.full$logml - bridge.omit$logml),
         "pererr.full"=error_measures(bridge.full)$cv,
         "pererr.omit"=error_measures(bridge.omit)$cv)
  }
}

BFBS()


# MCMC <- BFBS(diag=T)
# MCMC.full <- MCMC$jags.full
# MCMC.omit <- MCMC$jags.omit
# plot(MCMC.full, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)