Bayes Factor for One-Way ANOVA

============================================================================================================
Part I: matrix data structure, ‘R2jags’, & ‘bridgesampling’, https://rpubs.com/sherloconan/1018077 (unreliable estimates)

1. Getting Started

In Loftus and Masson (1994), some to-be-recalled 20-word lists are presented at a rate of 1, 2, or 5 sec per word. Suppose that the hypothetical experiment is run as a balanced one-way between-subjects design with three conditions.

dat <- matrix(c(10,6,11,22,16,15,1,12,9,8,
                13,8,14,23,18,17,1,15,12,9,
                13,8,14,25,20,17,4,17,12,12), ncol=3,
              dimnames=list(NULL, paste0("Level",1:3))) #wide data format
# dat <- dat * 100

df <- data.frame("Level"=factor(rep(paste0("Level",1:3), each=10)),
                 "Response"=c(dat)) #long data format

Or, testing a simulated data set.

m1 <- 100; sd <- 20; n <- 24
m2 <- m1 - pwr::pwr.t.test(n=n, power=.8)$d * sd #83.47737
set.seed(277)
dat <- matrix(c(rnorm(n, m1, sd),
                rnorm(n, m2, sd)), ncol=2,
              dimnames=list(NULL, paste0("Level",1:2)))
# dat <- dat * 100

df <- data.frame("Level"=factor(rep(paste0("Level",1:2), each=n)),
                 "Response"=c(dat))

## The Bayes factors will be computed for the hypothetical data (in the first chunk).

Method: Constructing the Jeffreys-Zellner-Siow (JZS) Bayes factor.

\[\begin{align*} \tag{1} \mathcal{M}_1:\ Y_{ij}&=\mu+\sigma_\epsilon d_i+\epsilon_{ij}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ij}&=\mu+\epsilon_{ij},\qquad\epsilon_{ij}\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,\sigma_\epsilon^2) \text{ for } i=1,\dotsb,a;\ j=1,\dotsb,n_i. \end{align*}\]

\[\begin{equation} \tag{2} \pi(\mu,\sigma_\epsilon^2)\propto 1/\ \sigma_\epsilon^{2} \end{equation}\]

\[\begin{equation} \tag{3} d_i\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]

\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]

(JZS_BF <- anovaBF(Response~Level, data=df, whichRandom="Level", progress=F))

## Bayes factor analysis
## --------------
## [1] Level : 0.1486628 ±0.01%
## 
## Against denominator:
##   Intercept only 
## ---
## Bayes factor type: BFlinearModel, JZS

Note: Random effects are assumed. See the discussion here.

Markov chain Monte Carlo (MCMC) diagnostics. Plots of iterations versus sampled values for each variable in the chain.

set.seed(277)
draws <- posterior(JZS_BF, iterations=1e5, progress=F)
summary(draws)

## 
## Iterations = 1:1e+05
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 1e+05 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                 Mean     SD Naive SE Time-series SE
## mu           12.7349  3.866  0.01223        0.01210
## Level-Level1 -1.4307  3.969  0.01255        0.01243
## Level-Level2  0.2094  3.967  0.01255        0.01251
## Level-Level3  1.2056  3.959  0.01252        0.01239
## sig2         35.5905 10.058  0.03181        0.03624
## g_Level       1.1991  6.040  0.01910        0.02693
## 
## 2. Quantiles for each variable:
## 
##                 2.5%     25%     50%     75%  97.5%
## mu            5.4396 10.8778 12.7250 14.6106 20.027
## Level-Level1 -9.0580 -3.3588 -1.3776  0.5549  5.929
## Level-Level2 -7.2830 -1.7555  0.2058  2.1586  7.766
## Level-Level3 -6.1510 -0.7853  1.1638  3.1439  8.869
## sig2         21.0488 28.4998 33.9149 40.7966 59.942
## g_Level       0.1294  0.3015  0.5286  1.0392  5.839

plot(draws, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)

2A. Full JAGS Model

\[\begin{equation} \tag{2} \pi(\mu,\sigma_\epsilon^2)\propto 1/\ \sigma_\epsilon^{2} \end{equation}\]

\[\begin{equation} \tag{3} d_i\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]

\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]

Specify the full model \(\mathcal{M}_1\) in (1). Compile the model in JAGS.

Variable	Description
`err_prec`	precision of the error
`trt`	indicators of which condition each observation received
`nC`	number of conditions
`nTot`	total number of observations
`t`	treatment effect vector (\(t_i=\sigma_\epsilon d_i\))
`mu_overall`	grand mean \(\mu\)
`lwr`	lower bound for the grand mean
`upr`	upper bound for the grand mean
`gt_prec`	precision of the standardized treatment effect \(d_i\)

model.full <- "model {
  for (j in 1:nTot) {
    Y[j] ~ dnorm(mu_overall + t[trt[j]], err_prec)
  }

  for (i in 1:nC) {
    t[i] ~ dnorm(0, err_prec * gt_prec)
  }

  gt_prec ~ dgamma(.5, .5 * ht * ht)

  mu_overall ~ dunif(lwr, upr)
  err_prec ~ dgamma(gamma_shape, gamma_rate)
}"

datalist <- list("nC"=ncol(dat), "nTot"=length(dat), "trt"=gl(ncol(dat),nrow(dat)),
                 "Y"=c(dat), "ht"=1, "lwr"=mean(dat)-6*sd(dat), "upr"=mean(dat)+6*sd(dat),
                 "gamma_shape"=0.001, "gamma_rate"=0.001)

3A. Log Posterior Function

\[\begin{equation} \tag{2} \pi(\mu,\sigma_\epsilon^2)\propto 1/\ \sigma_\epsilon^{2} \end{equation}\]

\[\begin{equation} \tag{3} d_i\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]

\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]

Take a parameter vector (e.g., c(err_prec = 0.031, gt_prec = 3.368, mu_overall = 12.925, "t[1]" = -2.314, "t[2]" = -1.496), "t[3]" = 1.769)) for \(a=3\) and the data as input and return the log of the unnormalized posterior density (i.e., a scalar value).

log.posterior.full <- function(pars, data) {
  mu_overall <- pars["mu_overall"]
  t <- pars[c(4:(3+data$nC))]
  mu <- numeric(data$nTot)
  for (j in 1:(data$nTot)) {
    mu[j] <- mu_overall + t[data$trt[j]]
  }
  gt_prec <- pars["gt_prec"]
  gt_sd <- 1/sqrt(gt_prec)
  err_prec <- pars["err_prec"]
  err_sd <- 1/sqrt(err_prec)

  dunif(mu_overall, data$lwr, data$upr, log=T) +
  sum(dnorm(t, 0, err_sd * gt_sd, log=T)) +
  dgamma(gt_prec, 0.5, rate=0.5 * data$ht * data$ht, log=T) +
  dgamma(err_prec, data$gamma_shape, rate=data$gamma_rate, log=T) +
  sum(dnorm(data$Y, mu, err_sd, log=T))
}

We did the debugging! See the discussion here.

The R2jags::jags (v 0.7-1) object alphabetically sorts the parameters by uppercase first, e.g., B, C, a, b, c.

And, bridgesampling::bridgesampler (v 1.1-2) somehow ignores all the parameters before “deviance”. The GitHub version (e.g., v 1.1-5) may have fixed it.

devtools::install_github("quentingronau/bridgesampling@master")

4A. Bounds for Parameters

Create named vectors with lower and upper bounds (lb and ub) for parameters.

ub.full <- rep(Inf, 3+datalist$nC)
names(ub.full) <- c("err_prec", "gt_prec", "mu_overall", paste0("t[",1:datalist$nC,"]"))
ub.full[3] <- datalist$upr
lb.full <- -ub.full
lb.full[c(1,2)] <- 0
lb.full[3] <- datalist$lwr
  
lb.full #lower bounds

##   err_prec    gt_prec mu_overall       t[1]       t[2]       t[3] 
##    0.00000    0.00000  -22.60308       -Inf       -Inf       -Inf

ub.full #upper bounds

##   err_prec    gt_prec mu_overall       t[1]       t[2]       t[3] 
##        Inf        Inf   48.06975        Inf        Inf        Inf

2B. Null JAGS Model

\[\begin{equation} \tag{2} \pi(\mu,\sigma_\epsilon^2)\propto 1/\ \sigma_\epsilon^{2} \end{equation}\]

\[\begin{equation} \tag{3} d_i\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]

\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]

Specify the null model \(\mathcal{M}_0\) in (1). Compile the model in JAGS.

Variable	Description
`err_prec`	precision of the error
`nTot`	total number of observations
`mu_overall`	grand mean \(\mu\)
`lwr`	lower bound for the grand mean
`upr`	upper bound for the grand mean

model.null <- "model {
  for (j in 1:nTot) {
    Y[j] ~ dnorm(mu_overall, err_prec)
  }

  mu_overall ~ dunif(lwr, upr)
  err_prec ~ dgamma(gamma_shape, gamma_rate)
}"

3B. Log Posterior Function

\[\begin{equation} \tag{2} \pi(\mu,\sigma_\epsilon^2)\propto 1/\ \sigma_\epsilon^{2} \end{equation}\]

\[\begin{equation} \tag{3} d_i\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]

\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]

Take a parameter vector and the data as input and return the log of the unnormalized posterior density (i.e., a scalar value).

log.posterior.null <- function(pars, data) {
  mu_overall <- pars["mu_overall"]
  err_prec <- pars["err_prec"]
  err_sd <- 1/sqrt(err_prec)

  dunif(mu_overall, data$lwr, data$upr, log=T) +
  dgamma(err_prec, data$gamma_shape, rate=data$gamma_rate, log=T) +
  sum(dnorm(data$Y, mu_overall, err_sd, log=T))
}

4B. Bounds for Parameters

Create named vectors with lower and upper bounds (lb and ub) for parameters.

(ub.null <- c("err_prec"=Inf, "mu_overall"=datalist$upr)) #upper bounds

##   err_prec mu_overall 
##        Inf   48.06975

(lb.null <- c("err_prec"=0, "mu_overall"=datalist$lwr)) #lower bounds

##   err_prec mu_overall 
##    0.00000  -22.60308

5. Bayes Factor via Bridge Sampling

Compute log marginal likelihoods for full and null models via bridge sampling. Then, \(\textit{BF}_{10}=\frac{p(\text{Data}\ \mid\ \mathcal{M}_1)}{p(\text{Data}\ \mid\ \mathcal{M}_0)}\).

BFBS <- function(seed=277, diagnostics=F) {
  set.seed(seed)
  jags.full <- jags(model.file=textConnection(model.full),
                    data=datalist, n.chains=2, n.iter=100000, n.burnin=50000, quiet=T, progress.bar="none",
                    inits=list(list("mu_overall"=mean(datalist$Y)*1.01),
                               list("mu_overall"=mean(datalist$Y)*0.99)),
                    parameters.to.save=c("mu_overall","t","gt_prec","err_prec"))
  
  set.seed(seed)
  jags.null <- jags(model.file=textConnection(model.null),
                    data=datalist[-c(1,3,5)], n.chains=2, n.iter=100000, n.burnin=50000, quiet=T, progress.bar="none",
                    inits=list(list("mu_overall"=mean(datalist$Y)*1.01),
                               list("mu_overall"=mean(datalist$Y)*0.99)),
                    parameters.to.save=c("mu_overall","err_prec"))
  
  set.seed(seed)
  bridge.full <- bridge_sampler(samples=jags.full,
                                log_posterior=log.posterior.full,
                                data=datalist,
                                lb=lb.full, ub=ub.full, silent=T)
  
  set.seed(seed)
  bridge.null <- bridge_sampler(samples=jags.null,
                                log_posterior=log.posterior.null,
                                data=datalist[-c(1,3,5)],
                                lb=lb.null, ub=ub.null, silent=T)
  
  if (diagnostics) {
    list("bf"=exp(bridge.full$logml - bridge.null$logml),
         "pererr.M1"=error_measures(bridge.full)$cv,
         "pererr.M0"=error_measures(bridge.null)$cv,
         "jags.M1"=as.mcmc(jags.full), "jags.M0"=as.mcmc(jags.null))
  } else {
    list("bf"=exp(bridge.full$logml - bridge.null$logml),
         "pererr.M1"=error_measures(bridge.full)$cv,
         "pererr.M0"=error_measures(bridge.null)$cv)
  }
}

BFBS()

## $bf
## [1] 0.1461343
## 
## $pererr.M1
## [1] 0.01590054
## 
## $pererr.M0
## [1] 0.003435124

Check the Monte Carlo error of estimating the Bayes factor.

num <- 50; BF <- PerErr.M1 <- PerErr.M0 <- numeric(num)
for (i in 1:num) {
  result <- BFBS(seed=i)
  BF[i] <- result$bf
  PerErr.M1[i] <- result$pererr.M1
  PerErr.M0[i] <- result$pererr.M0
}

range(PerErr.M1); range(PerErr.M0)

hist(BF,prob=T,col="white",yaxt="n",ylab="",
     xlab="Bayes factor estimates",main="")
lines(density(BF),col="red",lwd=2)

## [1] 0.01336156 0.02714697

## [1] 0.002841254 0.004469564

MCMC diagnostics. Plots of iterations versus sampled values for each variable in the chain.

MCMC <- BFBS(diag=T); MCMC.M1 <- MCMC$jags.M1; MCMC.M0 <- MCMC$jags.M0
plot(MCMC.M1, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)

plot(MCMC.M0, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)

All-in-One R Script

library(R2jags) #install JAGS at first: http://mcmc-jags.sourceforge.net/
# devtools::install_github("quentingronau/bridgesampling@master")
library(bridgesampling)
require(pwr)
library(BayesFactor)

run.this=T #if TRUE, test the hypothetical data; if FALSE, test the simulated data
if (run.this) {
  dat <- matrix(c(10,6,11,22,16,15,1,12,9,8,
                  13,8,14,23,18,17,1,15,12,9,
                  13,8,14,25,20,17,4,17,12,12), ncol=3,
                dimnames=list(NULL, paste0("Level",1:3))) #wide data format
  
  df <- data.frame("Level"=factor(rep(paste0("Level",1:3), each=10)),
                   "Response"=c(dat)) #long data format
} else {
  m1 <- 100; sd <- 20; n <- 24
  m2 <- m1 - pwr::pwr.t.test(n=n, power=.8)$d * sd #83.47737
  set.seed(277)
  dat <- matrix(c(rnorm(n, m1, sd),
                  rnorm(n, m2, sd)), ncol=2,
                dimnames=list(NULL, paste0("Level",1:2)))
  
  df <- data.frame("Level"=factor(rep(paste0("Level",1:2), each=n)),
                   "Response"=c(dat))
}

(JZS_BF <- anovaBF(Response~Level, data=df, whichRandom="Level", progress=F)) #Random effects are assumed.
# set.seed(277)
# draws <- posterior(JZS_BF, iterations=1e5, progress=F)
# summary(draws)
# plot(draws, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)

{
  model.full <- "model {
  for (j in 1:nTot) {
    Y[j] ~ dnorm(mu_overall + t[trt[j]], err_prec)
  }

  for (i in 1:nC) {
    t[i] ~ dnorm(0, err_prec * gt_prec)
  }

  gt_prec ~ dgamma(.5, .5 * ht * ht)

  mu_overall ~ dunif(lwr, upr)
  err_prec ~ dgamma(gamma_shape, gamma_rate)
  }"
  
  model.null <- "model {
  for (j in 1:nTot) {
    Y[j] ~ dnorm(mu_overall, err_prec)
  }

  mu_overall ~ dunif(lwr, upr)
  err_prec ~ dgamma(gamma_shape, gamma_rate)
  }"
} #JAGS models

datalist <- list("nC"=ncol(dat), "nTot"=length(dat), "trt"=gl(ncol(dat),nrow(dat)),
                 "Y"=c(dat), "ht"=1, "lwr"=mean(dat)-6*sd(dat), "upr"=mean(dat)+6*sd(dat),
                 "gamma_shape"=0.001, "gamma_rate"=0.001)

{
  log.posterior.full <- function(pars, data) {
    mu_overall <- pars["mu_overall"]
    t <- pars[c(4:(3+data$nC))]
    mu <- numeric(data$nTot)
    for (j in 1:(data$nTot)) {
      mu[j] <- mu_overall + t[data$trt[j]]
    }
    gt_prec <- pars["gt_prec"]
    gt_sd <- 1/sqrt(gt_prec)
    err_prec <- pars["err_prec"]
    err_sd <- 1/sqrt(err_prec)
    
    dunif(mu_overall, data$lwr, data$upr, log=T) +
      sum(dnorm(t, 0, err_sd * gt_sd, log=T)) +
      dgamma(gt_prec, 0.5, rate=0.5 * data$ht * data$ht, log=T) +
      dgamma(err_prec, data$gamma_shape, rate=data$gamma_rate, log=T) +
      sum(dnorm(data$Y, mu, err_sd, log=T))
  }
  
  log.posterior.null <- function(pars, data) {
    mu_overall <- pars["mu_overall"]
    err_prec <- pars["err_prec"]
    err_sd <- 1/sqrt(err_prec)
    
    dunif(mu_overall, data$lwr, data$upr, log=T) +
      dgamma(err_prec, data$gamma_shape, rate=data$gamma_rate, log=T) +
      sum(dnorm(data$Y, mu_overall, err_sd, log=T))
  }
} #log posterior functions

{
  ub.full <- rep(Inf, 3+datalist$nC)
  names(ub.full) <- c("err_prec", "gt_prec", "mu_overall", paste0("t[",1:datalist$nC,"]"))
  ub.full[3] <- datalist$upr
  lb.full <- -ub.full
  lb.full[c(1,2)] <- 0
  lb.full[3] <- datalist$lwr
  
  ub.null <- c("err_prec"=Inf, "mu_overall"=datalist$upr)
  lb.null <- c("err_prec"=0, "mu_overall"=datalist$lwr)
} #bounds for parameters


BFBS <- function(seed=277, diagnostics=F) {
  set.seed(seed)
  jags.full <- jags(model.file=textConnection(model.full),
                    data=datalist, n.chains=2, n.iter=100000, n.burnin=50000, quiet=T, progress.bar="none",
                    inits=list(list("mu_overall"=mean(datalist$Y)*1.01),
                               list("mu_overall"=mean(datalist$Y)*0.99)),
                    parameters.to.save=c("mu_overall","t","gt_prec","err_prec"))
  
  set.seed(seed)
  jags.null <- jags(model.file=textConnection(model.null),
                    data=datalist[-c(1,3,5)], n.chains=2, n.iter=100000, n.burnin=50000, quiet=T, progress.bar="none",
                    inits=list(list("mu_overall"=mean(datalist$Y)*1.01),
                               list("mu_overall"=mean(datalist$Y)*0.99)),
                    parameters.to.save=c("mu_overall","err_prec"))
  
  set.seed(seed)
  bridge.full <- bridge_sampler(samples=jags.full,
                                log_posterior=log.posterior.full,
                                data=datalist,
                                lb=lb.full, ub=ub.full, silent=T)
  
  set.seed(seed)
  bridge.null <- bridge_sampler(samples=jags.null,
                                log_posterior=log.posterior.null,
                                data=datalist[-c(1,3,5)],
                                lb=lb.null, ub=ub.null, silent=T)
  
  if (diagnostics) {
    list("bf"=exp(bridge.full$logml - bridge.null$logml),
         "pererr.M1"=error_measures(bridge.full)$cv,
         "pererr.M0"=error_measures(bridge.null)$cv,
         "jags.M1"=as.mcmc(jags.full), "jags.M0"=as.mcmc(jags.null))
  } else {
    list("bf"=exp(bridge.full$logml - bridge.null$logml),
         "pererr.M1"=error_measures(bridge.full)$cv,
         "pererr.M0"=error_measures(bridge.null)$cv)
  }
}

BFBS()


# MCMC <- BFBS(diag=T)
# MCMC.M1 <- MCMC$jags.M1
# MCMC.M0 <- MCMC$jags.M0
# plot(MCMC.M1, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)

Bayes Factor for One-Way ANOVA

Part Two: Ragged Data Structure, ‘R2jags’, and ‘bridgesampling’

Zhengxiao Wei

2023-03-21

1. Getting Started

2A. Full JAGS Model

3A. Log Posterior Function

4A. Bounds for Parameters

2B. Null JAGS Model

3B. Log Posterior Function

4B. Bounds for Parameters

5. Bayes Factor via Bridge Sampling

All-in-One R Script