============================================================================================================
Part I: matrix data structure, ‘R2jags’, & ‘bridgesampling’, https://rpubs.com/sherloconan/1018077 (unreliable estimates)

Part II: ragged data structure, ‘R2jags’, & ‘bridgesampling’, https://rpubs.com/sherloconan/1021087

Part III: matrix data structure, ‘rstan’, & ‘bridgesampling’, https://rpubs.com/sherloconan/1021097

Part IV: ragged data structure, ‘rstan’, & ‘bridgesampling’, https://rpubs.com/sherloconan/1022575

Part V: modeling fixed effects, https://rpubs.com/sherloconan/1070217

1. Getting Started

 

In Loftus and Masson (1994), some to-be-recalled 20-word lists are presented at a rate of 1, 2, or 5 sec per word. Suppose that the hypothetical experiment is run as a balanced one-way between-subjects design with three conditions.

dat <- matrix(c(10,6,11,22,16,15,1,12,9,8,
                13,8,14,23,18,17,1,15,12,9,
                13,8,14,25,20,17,4,17,12,12), ncol=3,
              dimnames=list(NULL, paste0("Level",1:3))) #wide data format
# dat <- dat * 100

df <- data.frame("Level"=factor(rep(paste0("Level",1:3), each=10)),
                 "Response"=c(dat)) #long data format

 

Or, testing a simulated data set.

m1 <- 100; sd <- 20; n <- 24
m2 <- m1 - pwr::pwr.t.test(n=n, power=.8)$d * sd #83.47737
set.seed(277)
dat <- matrix(c(rnorm(n, m1, sd),
                rnorm(n, m2, sd)), ncol=2,
              dimnames=list(NULL, paste0("Level",1:2)))
# dat <- dat * 100

df <- data.frame("Level"=factor(rep(paste0("Level",1:2), each=n)),
                 "Response"=c(dat))
## The Bayes factors will be computed for the hypothetical data (in the first chunk).

 

Method: Constructing the Jeffreys-Zellner-Siow (JZS) Bayes factor.

\[\begin{align*} \tag{1} \mathcal{M}_1:\ Y_{ij}&=\mu+\sigma_\epsilon d_i+\epsilon_{ij}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ij}&=\mu+\epsilon_{ij},\qquad\epsilon_{ij}\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,\sigma_\epsilon^2) \text{ for } i=1,\dotsb,a;\ j=1,\dotsb,n_i. \end{align*}\]

\[\begin{equation} \tag{2} \pi(\mu,\sigma_\epsilon^2)\propto 1/\ \sigma_\epsilon^{2} \end{equation}\]

\[\begin{equation} \tag{3} d_i\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]

\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]

(JZS_BF <- anovaBF(Response~Level, data=df, whichRandom="Level", progress=F))
## Bayes factor analysis
## --------------
## [1] Level : 0.1486628 ±0.01%
## 
## Against denominator:
##   Intercept only 
## ---
## Bayes factor type: BFlinearModel, JZS

Note: Random effects are assumed. See the discussion here.

 

Markov chain Monte Carlo (MCMC) diagnostics. Plots of iterations versus sampled values for each variable in the chain.

set.seed(277)
draws <- posterior(JZS_BF, iterations=1e5, progress=F)
summary(draws)
## 
## Iterations = 1:1e+05
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 1e+05 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                 Mean     SD Naive SE Time-series SE
## mu           12.7349  3.866  0.01223        0.01210
## Level-Level1 -1.4307  3.969  0.01255        0.01243
## Level-Level2  0.2094  3.967  0.01255        0.01251
## Level-Level3  1.2056  3.959  0.01252        0.01239
## sig2         35.5905 10.058  0.03181        0.03624
## g_Level       1.1991  6.040  0.01910        0.02693
## 
## 2. Quantiles for each variable:
## 
##                 2.5%     25%     50%     75%  97.5%
## mu            5.4396 10.8778 12.7250 14.6106 20.027
## Level-Level1 -9.0580 -3.3588 -1.3776  0.5549  5.929
## Level-Level2 -7.2830 -1.7555  0.2058  2.1586  7.766
## Level-Level3 -6.1510 -0.7853  1.1638  3.1439  8.869
## sig2         21.0488 28.4998 33.9149 40.7966 59.942
## g_Level       0.1294  0.3015  0.5286  1.0392  5.839
plot(draws, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)

   

2A. Full JAGS Model

 

\[\begin{align*} \tag{1} \mathcal{M}_1:\ Y_{ij}&=\mu+\sigma_\epsilon d_i+\epsilon_{ij}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ij}&=\mu+\epsilon_{ij},\qquad\epsilon_{ij}\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,\sigma_\epsilon^2) \text{ for } i=1,\dotsb,a;\ j=1,\dotsb,n_i. \end{align*}\]

\[\begin{equation} \tag{2} \pi(\mu,\sigma_\epsilon^2)\propto 1/\ \sigma_\epsilon^{2} \end{equation}\]

\[\begin{equation} \tag{3} d_i\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]

\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]

 

Specify the full model \(\mathcal{M}_1\) in (1). Compile the model in JAGS.

Variable Description
err_prec precision of the error
trt indicators of which condition each observation received
nC number of conditions
nTot total number of observations
t treatment effect vector (\(t_i=\sigma_\epsilon d_i\))
mu_overall grand mean \(\mu\)
lwr lower bound for the grand mean
upr upper bound for the grand mean
gt_prec precision of the standardized treatment effect \(d_i\)
model.full <- "model {
  for (j in 1:nTot) {
    Y[j] ~ dnorm(mu_overall + t[trt[j]], err_prec)
  }

  for (i in 1:nC) {
    t[i] ~ dnorm(0, err_prec * gt_prec)
  }

  gt_prec ~ dgamma(.5, .5 * ht * ht)

  mu_overall ~ dunif(lwr, upr)
  err_prec ~ dgamma(gamma_shape, gamma_rate)
}"

datalist <- list("nC"=ncol(dat), "nTot"=length(dat), "trt"=gl(ncol(dat),nrow(dat)),
                 "Y"=c(dat), "ht"=1, "lwr"=mean(dat)-6*sd(dat), "upr"=mean(dat)+6*sd(dat),
                 "gamma_shape"=0.001, "gamma_rate"=0.001)

   

3A. Log Posterior Function

 

\[\begin{align*} \tag{1} \mathcal{M}_1:\ Y_{ij}&=\mu+\sigma_\epsilon d_i+\epsilon_{ij}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ij}&=\mu+\epsilon_{ij},\qquad\epsilon_{ij}\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,\sigma_\epsilon^2) \text{ for } i=1,\dotsb,a;\ j=1,\dotsb,n_i. \end{align*}\]

\[\begin{equation} \tag{2} \pi(\mu,\sigma_\epsilon^2)\propto 1/\ \sigma_\epsilon^{2} \end{equation}\]

\[\begin{equation} \tag{3} d_i\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]

\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]

 

Take a parameter vector (e.g., c(err_prec = 0.031, gt_prec = 3.368, mu_overall = 12.925, "t[1]" = -2.314, "t[2]" = -1.496), "t[3]" = 1.769)) for \(a=3\) and the data as input and return the log of the unnormalized posterior density (i.e., a scalar value).

log.posterior.full <- function(pars, data) {
  mu_overall <- pars["mu_overall"]
  t <- pars[c(4:(3+data$nC))]
  mu <- numeric(data$nTot)
  for (j in 1:(data$nTot)) {
    mu[j] <- mu_overall + t[data$trt[j]]
  }
  gt_prec <- pars["gt_prec"]
  gt_sd <- 1/sqrt(gt_prec)
  err_prec <- pars["err_prec"]
  err_sd <- 1/sqrt(err_prec)

  dunif(mu_overall, data$lwr, data$upr, log=T) +
  sum(dnorm(t, 0, err_sd * gt_sd, log=T)) +
  dgamma(gt_prec, 0.5, rate=0.5 * data$ht * data$ht, log=T) +
  dgamma(err_prec, data$gamma_shape, rate=data$gamma_rate, log=T) +
  sum(dnorm(data$Y, mu, err_sd, log=T))
}

 

We did the debugging! See the discussion here.

The R2jags::jags (v 0.7-1) object alphabetically sorts the parameters by uppercase first, e.g., B, C, a, b, c.

And, bridgesampling::bridgesampler (v 1.1-2) somehow ignores all the parameters before “deviance”. The GitHub version (e.g., v 1.1-5) may have fixed it.

devtools::install_github("quentingronau/bridgesampling@master")

   

4A. Bounds for Parameters

 

Create named vectors with lower and upper bounds (lb and ub) for parameters.

ub.full <- rep(Inf, 3+datalist$nC)
names(ub.full) <- c("err_prec", "gt_prec", "mu_overall", paste0("t[",1:datalist$nC,"]"))
ub.full[3] <- datalist$upr
lb.full <- -ub.full
lb.full[c(1,2)] <- 0
lb.full[3] <- datalist$lwr
  
lb.full #lower bounds
##   err_prec    gt_prec mu_overall       t[1]       t[2]       t[3] 
##    0.00000    0.00000  -22.60308       -Inf       -Inf       -Inf
ub.full #upper bounds
##   err_prec    gt_prec mu_overall       t[1]       t[2]       t[3] 
##        Inf        Inf   48.06975        Inf        Inf        Inf

   

2B. Null JAGS Model

 

\[\begin{align*} \tag{1} \mathcal{M}_1:\ Y_{ij}&=\mu+\sigma_\epsilon d_i+\epsilon_{ij}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ij}&=\mu+\epsilon_{ij},\qquad\epsilon_{ij}\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,\sigma_\epsilon^2) \text{ for } i=1,\dotsb,a;\ j=1,\dotsb,n_i. \end{align*}\]

\[\begin{equation} \tag{2} \pi(\mu,\sigma_\epsilon^2)\propto 1/\ \sigma_\epsilon^{2} \end{equation}\]

\[\begin{equation} \tag{3} d_i\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]

\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]

 

Specify the null model \(\mathcal{M}_0\) in (1). Compile the model in JAGS.

Variable Description
err_prec precision of the error
nTot total number of observations
mu_overall grand mean \(\mu\)
lwr lower bound for the grand mean
upr upper bound for the grand mean
model.null <- "model {
  for (j in 1:nTot) {
    Y[j] ~ dnorm(mu_overall, err_prec)
  }

  mu_overall ~ dunif(lwr, upr)
  err_prec ~ dgamma(gamma_shape, gamma_rate)
}"

   

3B. Log Posterior Function

 

\[\begin{align*} \tag{1} \mathcal{M}_1:\ Y_{ij}&=\mu+\sigma_\epsilon d_i+\epsilon_{ij}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ij}&=\mu+\epsilon_{ij},\qquad\epsilon_{ij}\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,\sigma_\epsilon^2) \text{ for } i=1,\dotsb,a;\ j=1,\dotsb,n_i. \end{align*}\]

\[\begin{equation} \tag{2} \pi(\mu,\sigma_\epsilon^2)\propto 1/\ \sigma_\epsilon^{2} \end{equation}\]

\[\begin{equation} \tag{3} d_i\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]

\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]

 

Take a parameter vector and the data as input and return the log of the unnormalized posterior density (i.e., a scalar value).

log.posterior.null <- function(pars, data) {
  mu_overall <- pars["mu_overall"]
  err_prec <- pars["err_prec"]
  err_sd <- 1/sqrt(err_prec)

  dunif(mu_overall, data$lwr, data$upr, log=T) +
  dgamma(err_prec, data$gamma_shape, rate=data$gamma_rate, log=T) +
  sum(dnorm(data$Y, mu_overall, err_sd, log=T))
}

   

4B. Bounds for Parameters

 

Create named vectors with lower and upper bounds (lb and ub) for parameters.

(ub.null <- c("err_prec"=Inf, "mu_overall"=datalist$upr)) #upper bounds
##   err_prec mu_overall 
##        Inf   48.06975
(lb.null <- c("err_prec"=0, "mu_overall"=datalist$lwr)) #lower bounds
##   err_prec mu_overall 
##    0.00000  -22.60308

   

5. Bayes Factor via Bridge Sampling

 

Compute log marginal likelihoods for full and null models via bridge sampling. Then, \(\textit{BF}_{10}=\frac{p(\text{Data}\ \mid\ \mathcal{M}_1)}{p(\text{Data}\ \mid\ \mathcal{M}_0)}\).

BFBS <- function(seed=277, diagnostics=F) {
  set.seed(seed)
  jags.full <- jags(model.file=textConnection(model.full),
                    data=datalist, n.chains=2, n.iter=100000, n.burnin=50000, quiet=T, progress.bar="none",
                    inits=list(list("mu_overall"=mean(datalist$Y)*1.01),
                               list("mu_overall"=mean(datalist$Y)*0.99)),
                    parameters.to.save=c("mu_overall","t","gt_prec","err_prec"))
  
  set.seed(seed)
  jags.null <- jags(model.file=textConnection(model.null),
                    data=datalist[-c(1,3,5)], n.chains=2, n.iter=100000, n.burnin=50000, quiet=T, progress.bar="none",
                    inits=list(list("mu_overall"=mean(datalist$Y)*1.01),
                               list("mu_overall"=mean(datalist$Y)*0.99)),
                    parameters.to.save=c("mu_overall","err_prec"))
  
  set.seed(seed)
  bridge.full <- bridge_sampler(samples=jags.full,
                                log_posterior=log.posterior.full,
                                data=datalist,
                                lb=lb.full, ub=ub.full, silent=T)
  
  set.seed(seed)
  bridge.null <- bridge_sampler(samples=jags.null,
                                log_posterior=log.posterior.null,
                                data=datalist[-c(1,3,5)],
                                lb=lb.null, ub=ub.null, silent=T)
  
  if (diagnostics) {
    list("bf"=exp(bridge.full$logml - bridge.null$logml),
         "pererr.M1"=error_measures(bridge.full)$cv,
         "pererr.M0"=error_measures(bridge.null)$cv,
         "jags.M1"=as.mcmc(jags.full), "jags.M0"=as.mcmc(jags.null))
  } else {
    list("bf"=exp(bridge.full$logml - bridge.null$logml),
         "pererr.M1"=error_measures(bridge.full)$cv,
         "pererr.M0"=error_measures(bridge.null)$cv)
  }
}

BFBS()
## $bf
## [1] 0.1461343
## 
## $pererr.M1
## [1] 0.01590054
## 
## $pererr.M0
## [1] 0.003435124

 

Check the Monte Carlo error of estimating the Bayes factor.

num <- 50; BF <- PerErr.M1 <- PerErr.M0 <- numeric(num)
for (i in 1:num) {
  result <- BFBS(seed=i)
  BF[i] <- result$bf
  PerErr.M1[i] <- result$pererr.M1
  PerErr.M0[i] <- result$pererr.M0
}

range(PerErr.M1); range(PerErr.M0)

hist(BF,prob=T,col="white",yaxt="n",ylab="",
     xlab="Bayes factor estimates",main="")
lines(density(BF),col="red",lwd=2)
## [1] 0.01336156 0.02714697
## [1] 0.002841254 0.004469564

   

MCMC diagnostics. Plots of iterations versus sampled values for each variable in the chain.

MCMC <- BFBS(diag=T); MCMC.M1 <- MCMC$jags.M1; MCMC.M0 <- MCMC$jags.M0
plot(MCMC.M1, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)

plot(MCMC.M0, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)

   

All-in-One R Script

 

library(R2jags) #install JAGS at first: http://mcmc-jags.sourceforge.net/
# devtools::install_github("quentingronau/bridgesampling@master")
library(bridgesampling)
require(pwr)
library(BayesFactor)

run.this=T #if TRUE, test the hypothetical data; if FALSE, test the simulated data
if (run.this) {
  dat <- matrix(c(10,6,11,22,16,15,1,12,9,8,
                  13,8,14,23,18,17,1,15,12,9,
                  13,8,14,25,20,17,4,17,12,12), ncol=3,
                dimnames=list(NULL, paste0("Level",1:3))) #wide data format
  
  df <- data.frame("Level"=factor(rep(paste0("Level",1:3), each=10)),
                   "Response"=c(dat)) #long data format
} else {
  m1 <- 100; sd <- 20; n <- 24
  m2 <- m1 - pwr::pwr.t.test(n=n, power=.8)$d * sd #83.47737
  set.seed(277)
  dat <- matrix(c(rnorm(n, m1, sd),
                  rnorm(n, m2, sd)), ncol=2,
                dimnames=list(NULL, paste0("Level",1:2)))
  
  df <- data.frame("Level"=factor(rep(paste0("Level",1:2), each=n)),
                   "Response"=c(dat))
}

(JZS_BF <- anovaBF(Response~Level, data=df, whichRandom="Level", progress=F)) #Random effects are assumed.
# set.seed(277)
# draws <- posterior(JZS_BF, iterations=1e5, progress=F)
# summary(draws)
# plot(draws, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)

{
  model.full <- "model {
  for (j in 1:nTot) {
    Y[j] ~ dnorm(mu_overall + t[trt[j]], err_prec)
  }

  for (i in 1:nC) {
    t[i] ~ dnorm(0, err_prec * gt_prec)
  }

  gt_prec ~ dgamma(.5, .5 * ht * ht)

  mu_overall ~ dunif(lwr, upr)
  err_prec ~ dgamma(gamma_shape, gamma_rate)
  }"
  
  model.null <- "model {
  for (j in 1:nTot) {
    Y[j] ~ dnorm(mu_overall, err_prec)
  }

  mu_overall ~ dunif(lwr, upr)
  err_prec ~ dgamma(gamma_shape, gamma_rate)
  }"
} #JAGS models

datalist <- list("nC"=ncol(dat), "nTot"=length(dat), "trt"=gl(ncol(dat),nrow(dat)),
                 "Y"=c(dat), "ht"=1, "lwr"=mean(dat)-6*sd(dat), "upr"=mean(dat)+6*sd(dat),
                 "gamma_shape"=0.001, "gamma_rate"=0.001)

{
  log.posterior.full <- function(pars, data) {
    mu_overall <- pars["mu_overall"]
    t <- pars[c(4:(3+data$nC))]
    mu <- numeric(data$nTot)
    for (j in 1:(data$nTot)) {
      mu[j] <- mu_overall + t[data$trt[j]]
    }
    gt_prec <- pars["gt_prec"]
    gt_sd <- 1/sqrt(gt_prec)
    err_prec <- pars["err_prec"]
    err_sd <- 1/sqrt(err_prec)
    
    dunif(mu_overall, data$lwr, data$upr, log=T) +
      sum(dnorm(t, 0, err_sd * gt_sd, log=T)) +
      dgamma(gt_prec, 0.5, rate=0.5 * data$ht * data$ht, log=T) +
      dgamma(err_prec, data$gamma_shape, rate=data$gamma_rate, log=T) +
      sum(dnorm(data$Y, mu, err_sd, log=T))
  }
  
  log.posterior.null <- function(pars, data) {
    mu_overall <- pars["mu_overall"]
    err_prec <- pars["err_prec"]
    err_sd <- 1/sqrt(err_prec)
    
    dunif(mu_overall, data$lwr, data$upr, log=T) +
      dgamma(err_prec, data$gamma_shape, rate=data$gamma_rate, log=T) +
      sum(dnorm(data$Y, mu_overall, err_sd, log=T))
  }
} #log posterior functions

{
  ub.full <- rep(Inf, 3+datalist$nC)
  names(ub.full) <- c("err_prec", "gt_prec", "mu_overall", paste0("t[",1:datalist$nC,"]"))
  ub.full[3] <- datalist$upr
  lb.full <- -ub.full
  lb.full[c(1,2)] <- 0
  lb.full[3] <- datalist$lwr
  
  ub.null <- c("err_prec"=Inf, "mu_overall"=datalist$upr)
  lb.null <- c("err_prec"=0, "mu_overall"=datalist$lwr)
} #bounds for parameters


BFBS <- function(seed=277, diagnostics=F) {
  set.seed(seed)
  jags.full <- jags(model.file=textConnection(model.full),
                    data=datalist, n.chains=2, n.iter=100000, n.burnin=50000, quiet=T, progress.bar="none",
                    inits=list(list("mu_overall"=mean(datalist$Y)*1.01),
                               list("mu_overall"=mean(datalist$Y)*0.99)),
                    parameters.to.save=c("mu_overall","t","gt_prec","err_prec"))
  
  set.seed(seed)
  jags.null <- jags(model.file=textConnection(model.null),
                    data=datalist[-c(1,3,5)], n.chains=2, n.iter=100000, n.burnin=50000, quiet=T, progress.bar="none",
                    inits=list(list("mu_overall"=mean(datalist$Y)*1.01),
                               list("mu_overall"=mean(datalist$Y)*0.99)),
                    parameters.to.save=c("mu_overall","err_prec"))
  
  set.seed(seed)
  bridge.full <- bridge_sampler(samples=jags.full,
                                log_posterior=log.posterior.full,
                                data=datalist,
                                lb=lb.full, ub=ub.full, silent=T)
  
  set.seed(seed)
  bridge.null <- bridge_sampler(samples=jags.null,
                                log_posterior=log.posterior.null,
                                data=datalist[-c(1,3,5)],
                                lb=lb.null, ub=ub.null, silent=T)
  
  if (diagnostics) {
    list("bf"=exp(bridge.full$logml - bridge.null$logml),
         "pererr.M1"=error_measures(bridge.full)$cv,
         "pererr.M0"=error_measures(bridge.null)$cv,
         "jags.M1"=as.mcmc(jags.full), "jags.M0"=as.mcmc(jags.null))
  } else {
    list("bf"=exp(bridge.full$logml - bridge.null$logml),
         "pererr.M1"=error_measures(bridge.full)$cv,
         "pererr.M0"=error_measures(bridge.null)$cv)
  }
}

BFBS()


# MCMC <- BFBS(diag=T)
# MCMC.M1 <- MCMC$jags.M1
# MCMC.M0 <- MCMC$jags.M0
# plot(MCMC.M1, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)