============================================================================================================
Part I: matrix data structure, ‘R2jags’, & ‘bridgesampling’, https://rpubs.com/sherloconan/1018077 (unreliable estimates)

Part II: ragged data structure, ‘R2jags’, & ‘bridgesampling’, https://rpubs.com/sherloconan/1021087

Part III: matrix data structure, ‘rstan’, & ‘bridgesampling’, https://rpubs.com/sherloconan/1021097

Part IV: ragged data structure, ‘rstan’, & ‘bridgesampling’, https://rpubs.com/sherloconan/1022575

Part V: modeling fixed effects, https://rpubs.com/sherloconan/1070217

1. Getting Started

 

In Loftus and Masson (1994), some to-be-recalled 20-word lists are presented at a rate of 1, 2, or 5 sec per word. Suppose that the hypothetical experiment is run as a balanced one-way between-subjects design with three conditions.

dat <- matrix(c(10,6,11,22,16,15,1,12,9,8,
                13,8,14,23,18,17,1,15,12,9,
                13,8,14,25,20,17,4,17,12,12), ncol=3,
              dimnames=list(NULL, paste0("Level",1:3))) #wide data format
# dat <- dat * 100

df <- data.frame("Level"=factor(rep(paste0("Level",1:3), each=10)),
                 "Response"=c(dat)) #long data format

 

Or, testing a simulated data set.

m1 <- 100; sd <- 20; n <- 24
m2 <- m1 - pwr::pwr.t.test(n=n, power=.8)$d * sd #83.47737
set.seed(277)
dat <- matrix(c(rnorm(n, m1, sd),
                rnorm(n, m2, sd)), ncol=2,
              dimnames=list(NULL, paste0("Level",1:2)))
# dat <- dat * 100

df <- data.frame("Level"=factor(rep(paste0("Level",1:2), each=n)),
                 "Response"=c(dat))
## The Bayes factors will be computed for the simulated data (in the second chunk).

 

Method: Constructing the Jeffreys-Zellner-Siow (JZS) Bayes factor.

\[\begin{align*} \tag{1} \mathcal{M}_1:\ Y_{ij}&=\mu+\sigma_\epsilon d_i+\epsilon_{ij}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ij}&=\mu+\epsilon_{ij},\qquad\epsilon_{ij}\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,\sigma_\epsilon^2) \text{ for } i=1,\dotsb,a;\ j=1,\dotsb,n. \end{align*}\]

\[\begin{equation} \tag{2} \pi(\mu,\sigma_\epsilon^2)\propto 1/\ \sigma_\epsilon^{2} \end{equation}\]

\[\begin{equation} \tag{3} d_i\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]

\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]

(JZS_BF <- anovaBF(Response~Level, data=df, whichRandom="Level", progress=F))
## Bayes factor analysis
## --------------
## [1] Level : 15.57637 ±0%
## 
## Against denominator:
##   Intercept only 
## ---
## Bayes factor type: BFlinearModel, JZS

Note: Random effects are assumed. See the discussion here.

 

Markov chain Monte Carlo (MCMC) diagnostics. Plots of iterations versus sampled values for each variable in the chain.

set.seed(277)
draws <- posterior(JZS_BF, iterations=1e5, progress=F)
summary(draws)
## 
## Iterations = 1:1e+05
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 1e+05 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                 Mean     SD Naive SE Time-series SE
## mu            95.263  40.55   0.1282         0.1206
## Level-Level1   9.402  40.56   0.1283         0.1205
## Level-Level2  -9.648  40.55   0.1282         0.1206
## sig2         468.706 101.48   0.3209         0.3415
## g_Level        8.382 396.02   1.2523         1.6672
## 
## 2. Quantiles for each variable:
## 
##                  2.5%      25%     50%     75%  97.5%
## mu            40.0456  84.4686  95.087 105.916 149.69
## Level-Level1 -44.7282  -1.2284   9.296  20.156  65.37
## Level-Level2 -64.3941 -20.1608  -9.278   1.291  45.60
## sig2         309.8889 397.2583 454.970 525.217 705.63
## g_Level        0.1857   0.5092   1.031   2.485  27.46
plot(draws, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)

   

2A. Full Stan Model

 

\[\begin{align*} \tag{1} \mathcal{M}_1:\ Y_{ij}&=\mu+\sigma_\epsilon d_i+\epsilon_{ij}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ij}&=\mu+\epsilon_{ij},\qquad\epsilon_{ij}\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,\sigma_\epsilon^2) \text{ for } i=1,\dotsb,a;\ j=1,\dotsb,n. \end{align*}\]

\[\begin{equation} \tag{2} \pi(\mu,\sigma_\epsilon^2)\propto 1/\ \sigma_\epsilon^{2} \end{equation}\]

\[\begin{equation} \tag{3} d_i\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]

\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]

 

Specify the full model \(\mathcal{M}_1\) in (1). Compile and fit the model in Stan.

Note that to compute the (log) marginal likelihood for a Stan model, we need to specify the model in a certain way. Instead of using “~” signs for specifying distributions, we need to directly use the (log) density functions. The reason for this is that when using the “~” sign, constant terms are dropped which are not needed for sampling from the posterior. However, for computing the marginal likelihood, these constants need to be retained. For instance, instead of writing y ~ normal(mu, sigma); we would need to write target += normal_lpdf(y | mu, sigma); (Gronau, 2021). See also the discussion regarding the implicit uniform prior for computing the log marginal likelihood.

stancodeM1 <- "
data {
  int<lower=1> N;        // number of subjects in the balanced group
  int<lower=2> C;        // number of conditions
  // vector[C] Y[N];        // responses [removed features]
  array[N] vector[C] Y;  // responses [Stan 2.26+ syntax for array declarations]
  real<lower=0> ht;      // scale parameter of the standardized treatment effects
  real lwr;              // lower bound for the grand mean
  real upr;              // upper bound for the grand mean
}

parameters {
  real<lower=lwr, upper=upr> mu;     // grand mean
  real<lower=0> sigma;               // standard deviation of the error
  real<lower=0> gt;                  // variance of the standardized treatment effects
  vector[C] t;                       // standardized treatment effects
}

model {
  target += uniform_lpdf(mu | lwr, upr);
  target += scaled_inv_chi_square_lpdf(gt | 1, ht); // Rouder et al. (2012)
  target += -log(sigma);                            // Jeffreys prior
  target += normal_lpdf(t | 0, sqrt(gt));
  for (i in 1:N) {
    target += normal_lpdf(Y[i] | mu + sigma * t, sigma);
  }
}"

stanmodelM1 <- stan_model(model_code=stancodeM1)
stanfitM1 <- sampling(stanmodelM1, data=list("N"=nrow(dat), "C"=ncol(dat), "Y"=dat, "ht"=1,
                                             "lwr"=mean(dat)-6*sd(dat),
                                             "upr"=mean(dat)+6*sd(dat)),
                      iter=50000, warmup=10000, chains=4, seed=277, refresh=0)

rstan::summary(stanfitM1)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu 95.0844 0.1697 24.0371 43.8462 84.6695 95.2221 105.7901 144.9831 20059.20 1.0002
sigma 21.5113 0.0087 2.2848 17.6000 19.9049 21.3020 22.9081 26.5530 68271.30 1.0000
gt 3.6528 0.1138 30.2947 0.1831 0.5027 1.0101 2.3984 20.9844 70831.87 1.0000
t[1] 0.4502 0.0080 1.1231 -1.8446 -0.0544 0.4301 0.9343 2.8369 19943.46 1.0002
t[2] -0.4441 0.0080 1.1234 -2.7907 -0.9434 -0.4373 0.0496 1.9089 19925.32 1.0002
lp__ -221.3084 0.0113 1.8739 -225.9730 -222.2747 -220.9189 -219.9243 -218.8357 27739.84 1.0000

   

2B. Null Stan Model

 

\[\begin{align*} \tag{1} \mathcal{M}_1:\ Y_{ij}&=\mu+\sigma_\epsilon d_i+\epsilon_{ij}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ij}&=\mu+\epsilon_{ij},\qquad\epsilon_{ij}\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,\sigma_\epsilon^2) \text{ for } i=1,\dotsb,a;\ j=1,\dotsb,n. \end{align*}\]

\[\begin{equation} \tag{2} \pi(\mu,\sigma_\epsilon^2)\propto 1/\ \sigma_\epsilon^{2} \end{equation}\]

\[\begin{equation} \tag{3} d_i\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]

\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]

 

Specify the null model \(\mathcal{M}_0\) in (1). Compile and fit the model in Stan.

stancodeM0 <- "
data {
  int<lower=1> N;        // total number of subjects
  vector[N] Y;           // responses
  real lwr;              // lower bound for the grand mean
  real upr;              // upper bound for the grand mean
}

parameters {
  real<lower=lwr, upper=upr> mu;     // grand mean
  real<lower=0> sigma;               // standard deviation of the error
}

model {
  target += uniform_lpdf(mu | lwr, upr);
  target += -log(sigma);             // Jeffreys prior
  target += normal_lpdf(Y | mu, sigma);
}"

stanmodelM0 <- stan_model(model_code=stancodeM0)
stanfitM0 <- sampling(stanmodelM0, data=list("N"=length(dat), "Y"=c(dat),
                                             "lwr"=mean(dat)-6*sd(dat),
                                             "upr"=mean(dat)+6*sd(dat)),
                      iter=50000, warmup=10000, chains=4, seed=277, refresh=0)

rstan::summary(stanfitM0)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu 95.1550 0.0096 3.4545 88.3366 92.8554 95.1628 97.4477 101.9427 129408.41 1
sigma 23.7295 0.0073 2.5147 19.4183 21.9589 23.5224 25.2621 29.2509 118611.49 1
lp__ -221.2673 0.0038 1.0285 -224.0292 -221.6629 -220.9494 -220.5351 -220.2686 71402.53 1

   

3. Bayes Factor via Bridge Sampling

 

Compute log marginal likelihoods for full and null models via bridge sampling. Then, \(\textit{BF}_{10}=\frac{p(\text{Data}\ \mid\ \mathcal{M}_1)}{p(\text{Data}\ \mid\ \mathcal{M}_0)}\).

M1 <- bridge_sampler(stanfitM1, silent=T)
summary(M1)
## 
## Bridge sampling log marginal likelihood estimate 
## (method = "normal", repetitions = 1):
## 
##  -220.9802
## 
## Error Measures:
## 
##  Relative Mean-Squared Error: 1.294006e-05
##  Coefficient of Variation: 0.00359723
##  Percentage Error: 0%
## 
## Note:
## All error measures are approximate.
M0 <- bridge_sampler(stanfitM0, silent=T)
summary(M0)
## 
## Bridge sampling log marginal likelihood estimate 
## (method = "normal", repetitions = 1):
## 
##  -223.7123
## 
## Error Measures:
## 
##  Relative Mean-Squared Error: 8.649591e-08
##  Coefficient of Variation: 0.0002941019
##  Percentage Error: 0%
## 
## Note:
## All error measures are approximate.
bf(M1, M0)
## Estimated Bayes factor in favor of M1 over M0: 15.36527

 

Check the Monte Carlo error of estimating the Bayes factor.

num <- 50; BF <- PerErr.M1 <- PerErr.M0 <- numeric(num)
system.time(
for (i in 1:num) { #roughly 42 min of run time
  result <- BFBS(seed=i) #defined in All-in-One R Script
  BF[i] <- result$bf
  PerErr.M1[i] <- result$pererr.M1
  PerErr.M0[i] <- result$pererr.M0
})


range(PerErr.M1); range(PerErr.M0)

hist(BF,prob=T,col="white",yaxt="n",ylab="",
     xlab="Bayes factor estimates",main="")
lines(density(BF),col="red",lwd=2)
## [1] 0.002901795 0.004325503
## [1] 0.0002903806 0.0003130767

   

All-in-One R Script

 

library(rstan)
# devtools::install_github("quentingronau/bridgesampling@master")
library(bridgesampling)
library(BayesFactor)

run.this=F #if TRUE, test the hypothetical data; if FALSE, test the simulated data
if (run.this) {
  dat <- matrix(c(10,6,11,22,16,15,1,12,9,8,
                  13,8,14,23,18,17,1,15,12,9,
                  13,8,14,25,20,17,4,17,12,12), ncol=3,
                dimnames=list(NULL, paste0("Level",1:3))) #wide data format
  
  df <- data.frame("Level"=factor(rep(paste0("Level",1:3), each=10)),
                   "Response"=c(dat)) #long data format
} else {
  m1 <- 100; sd <- 20; n <- 24
  m2 <- m1 - pwr::pwr.t.test(n=n, power=.8)$d * sd #83.47737
  set.seed(277)
  dat <- matrix(c(rnorm(n, m1, sd),
                  rnorm(n, m2, sd)), ncol=2,
                dimnames=list(NULL, paste0("Level",1:2)))
  
  df <- data.frame("Level"=factor(rep(paste0("Level",1:2), each=n)),
                   "Response"=c(dat))
}

(JZS_BF <- anovaBF(Response~Level, data=df, progress=F))
# set.seed(277)
# draws <- posterior(JZS_BF, iterations=1e5, progress=F)
# summary(draws)
# plot(draws, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)

{
  stancodeM1 <- "
  data {
    int<lower=1> N;        // number of subjects in the balanced group
    int<lower=2> C;        // number of conditions
    vector[C] Y[N];        // responses
    real<lower=0> ht;      // scale parameter of the standardized treatment effects
    real lwr;              // lower bound for the grand mean
    real upr;              // upper bound for the grand mean
  }

  parameters {
    real<lower=lwr, upper=upr> mu;     // grand mean
    real<lower=0> sigma;               // standard deviation of the error
    real<lower=0> gt;                  // variance of the standardized treatment effects
    vector[C] t;                       // treatment effects
  }

  transformed parameters {
    real<lower=0> eta;     // standard deviation of the treatment effects
    eta = sigma * sqrt(gt);
  }

  model {
    target += uniform_lpdf(mu | lwr, upr);
    target += scaled_inv_chi_square_lpdf(gt | 1, ht); // Rouder et al. (2012)
    target += -log(sigma);                            // Jeffreys prior
    target += normal_lpdf(t | 0, eta);
    for (i in 1:N) {
      target += normal_lpdf(Y[i] | mu + t, sigma);
    }
  }"
  
  stanmodelM1 <- stan_model(model_code=stancodeM1)
  
  stancodeM0 <- "
  data {
    int<lower=1> N;        // total number of subjects
    vector[N] Y;           // responses
    real lwr;              // lower bound for the grand mean
    real upr;              // upper bound for the grand mean
  }

  parameters {
    real<lower=lwr, upper=upr> mu;     // grand mean
    real<lower=0> sigma;               // standard deviation of the error
  }

  model {
    target += uniform_lpdf(mu | lwr, upr);
    target += -log(sigma);             // Jeffreys prior
    target += normal_lpdf(Y | mu, sigma);
  }"
  
  stanmodelM0 <- stan_model(model_code=stancodeM0)
} #Stan models


BFBS <- function(seed=277, diagnostics=F) {
  stanfitM0 <- sampling(stanmodelM0, data=list("N"=length(dat), "Y"=c(dat),
                                               "lwr"=mean(dat)-6*sd(dat),
                                               "upr"=mean(dat)+6*sd(dat)),
                        iter=50000, warmup=10000, chains=4,
                        seed=seed, refresh=0)
  stanfitM1 <- sampling(stanmodelM1, data=list("N"=nrow(dat), "C"=ncol(dat), "Y"=dat, "ht"=1,
                                               "lwr"=mean(dat)-6*sd(dat), "upr"=mean(dat)+6*sd(dat)),
                        iter=50000, warmup=10000, chains=4,
                        seed=seed, refresh=0)
  
  set.seed(seed)
  M0 <- bridge_sampler(stanfitM0, silent=T)
  
  set.seed(seed)
  M1 <- bridge_sampler(stanfitM1, silent=T)
  
  if (diagnostics) {
    list("bf"=exp(M1$logml - M0$logml),
         "pererr.M1"=error_measures(M1)$cv,
         "pererr.M0"=error_measures(M0)$cv,
         "stan.M1"=stanfitM1, "stan.M0"=stanfitM0)
  } else {
    list("bf"=exp(M1$logml - M0$logml),
         "pererr.M1"=error_measures(M1)$cv,
         "pererr.M0"=error_measures(M0)$cv)
  }
}

BFBS()


# MCMC <- BFBS(diag=T)
# rstan::summary(MCMC$stan.M1)[[1]]
# rstan::summary(MCMC$stan.M0)[[1]]