============================================================================================================
Part I: matrix data structure, ‘R2jags’, & ‘bridgesampling’, https://rpubs.com/sherloconan/1018077 (unreliable estimates)

Part II: ragged data structure, ‘R2jags’, & ‘bridgesampling’, https://rpubs.com/sherloconan/1021087

Part III: matrix data structure, ‘rstan’, & ‘bridgesampling’, https://rpubs.com/sherloconan/1021097

Part IV: ragged data structure, ‘rstan’, & ‘bridgesampling’, https://rpubs.com/sherloconan/1022575

Part V: modeling fixed effects, https://rpubs.com/sherloconan/1070217

1. Getting Started

 

In Loftus and Masson (1994), some to-be-recalled 20-word lists are presented at a rate of 1, 2, or 5 sec per word. Suppose that the hypothetical experiment is run as a balanced one-way between-subjects design with three conditions.

dat <- matrix(c(10,6,11,22,16,15,1,12,9,8,
                13,8,14,23,18,17,1,15,12,9,
                13,8,14,25,20,17,4,17,12,12), ncol=3,
              dimnames=list(NULL, paste0("Level",1:3))) #wide data format

df <- data.frame("Level"=factor(rep(paste0("Level",1:3), each=10)),
                 "Response"=c(dat)) #long data format

 

Or, testing a simulated data set.

m1 <- 100; sd <- 20; n <- 24
m2 <- m1 - pwr::pwr.t.test(n=n, power=.8)$d * sd #83.47737
set.seed(277)
dat <- matrix(c(rnorm(n, m1, sd),
                rnorm(n, m2, sd)), ncol=2,
              dimnames=list(NULL, paste0("Level",1:2)))

df <- data.frame("Level"=factor(rep(paste0("Level",1:2), each=n)),
                 "Response"=c(dat))
## The Bayes factors will be computed for the hypothetical data (in the first chunk).

 

Method: Constructing the Jeffreys-Zellner-Siow (JZS) Bayes factor.

\[\begin{align*} \tag{1} \mathcal{M}_1:\ Y_{ij}&=\mu+\sigma_\epsilon d_i+\epsilon_{ij}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ij}&=\mu+\epsilon_{ij},\qquad\epsilon_{ij}\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,\sigma_\epsilon^2) \text{ for } i=1,\dotsb,a;\ j=1,\dotsb,n_i. \end{align*}\]

\[\begin{equation} \tag{2} \pi(\mu,\sigma_\epsilon^2)\propto 1/\ \sigma_\epsilon^{2} \end{equation}\]

\[\begin{equation} \tag{3} d_i\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]

\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]

(JZS_BF <- anovaBF(Response~Level, data=df, whichRandom="Level", progress=F))
## Bayes factor analysis
## --------------
## [1] Level : 0.1486628 ±0.01%
## 
## Against denominator:
##   Intercept only 
## ---
## Bayes factor type: BFlinearModel, JZS

Note: Random effects are assumed. See the discussion here.

 

Markov chain Monte Carlo (MCMC) diagnostics. Plots of iterations versus sampled values for each variable in the chain.

set.seed(277)
draws <- posterior(JZS_BF, iterations=1e5, progress=F)
summary(draws)
## 
## Iterations = 1:1e+05
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 1e+05 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                 Mean     SD Naive SE Time-series SE
## mu           12.7349  3.866  0.01223        0.01210
## Level-Level1 -1.4307  3.969  0.01255        0.01243
## Level-Level2  0.2094  3.967  0.01255        0.01251
## Level-Level3  1.2056  3.959  0.01252        0.01239
## sig2         35.5905 10.058  0.03181        0.03624
## g_Level       1.1991  6.040  0.01910        0.02693
## 
## 2. Quantiles for each variable:
## 
##                 2.5%     25%     50%     75%  97.5%
## mu            5.4396 10.8778 12.7250 14.6106 20.027
## Level-Level1 -9.0580 -3.3588 -1.3776  0.5549  5.929
## Level-Level2 -7.2830 -1.7555  0.2058  2.1586  7.766
## Level-Level3 -6.1510 -0.7853  1.1638  3.1439  8.869
## sig2         21.0488 28.4998 33.9149 40.7966 59.942
## g_Level       0.1294  0.3015  0.5286  1.0392  5.839
plot(draws, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)

   

2A. Full Stan Model

 

\[\begin{align*} \tag{1} \mathcal{M}_1:\ Y_{ij}&=\mu+\sigma_\epsilon d_i+\epsilon_{ij}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ij}&=\mu+\epsilon_{ij},\qquad\epsilon_{ij}\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,\sigma_\epsilon^2) \text{ for } i=1,\dotsb,a;\ j=1,\dotsb,n_i. \end{align*}\]

\[\begin{equation} \tag{2} \pi(\mu,\sigma_\epsilon^2)\propto 1/\ \sigma_\epsilon^{2} \end{equation}\]

\[\begin{equation} \tag{3} d_i\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]

\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]

 

Specify the full model \(\mathcal{M}_1\) in (1). Compile and fit the model in Stan.

Note that to compute the (log) marginal likelihood for a Stan model, we need to specify the model in a certain way. Instead of using “~” signs for specifying distributions, we need to directly use the (log) density functions. The reason for this is that when using the “~” sign, constant terms are dropped which are not needed for sampling from the posterior. However, for computing the marginal likelihood, these constants need to be retained. For instance, instead of writing y ~ normal(mu, sigma) we would need to write target += normal_lpdf(y | mu, sigma) (Gronau, 2021). See also the discussion regarding the implicit uniform prior for computing the log marginal likelihood.

stancodeM1 <- "
data {
  int<lower=1> N;        // total number of subjects
  int<lower=2> C;        // number of conditions
  vector[N] Y;           // responses (ready for the ragged data structure)
  // int s[C];              // condition sizes (i.e., number of subjects in each condition) [removed features]
  array[C] int s;        // condition sizes [Stan 2.26+ syntax for array declarations]
  real<lower=0> ht;      // scale parameter of the standardized treatment effects
  real lwr;              // lower bound for the grand mean
  real upr;              // upper bound for the grand mean
}

parameters {
  real<lower=lwr, upper=upr> mu;     // grand mean
  real<lower=0> sigma;               // standard deviation of the error
  real<lower=0> gt;                  // variance of the standardized treatment effects
  vector[C] t;                       // treatment effects
}

transformed parameters {
  real<lower=0> eta;     // standard deviation of the treatment effects
  eta = sigma * sqrt(gt);
}

model {
  int pos;
  pos = 1;

  for (i in 1:C) {
    target += normal_lpdf(segment(Y, pos, s[i]) | mu + t[i], sigma);
    pos = pos + s[i];
  }
  target += normal_lpdf(t | 0, eta);

  target += uniform_lpdf(mu | lwr, upr);
  target += -log(sigma);                            // Jeffreys prior
  target += scaled_inv_chi_square_lpdf(gt | 1, ht); // Rouder et al. (2012)
}"

stanmodelM1 <- stan_model(model_code=stancodeM1)
stanfitM1 <- sampling(stanmodelM1, 
                      data=list("N"=length(dat), "C"=ncol(dat), "Y"=c(dat), 
                                "s"=rep(nrow(dat),ncol(dat)), "ht"=1,
                                "lwr"=mean(dat)-6*sd(dat),
                                "upr"=mean(dat)+6*sd(dat)),
                      iter=50000, warmup=10000, chains=4,
                      seed=277, refresh=0)

rstan::summary(stanfitM1)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu 12.7307 0.0262 3.8386 5.1333 10.8575 12.7372 14.6000 20.2193 21478.83 1.0001
sigma 5.9198 0.0030 0.8096 4.5853 5.3457 5.8353 6.3967 7.7461 72425.07 1.0000
gt 1.2299 0.0279 6.3457 0.1287 0.3015 0.5330 1.0628 6.0568 51827.68 1.0001
t[1] -1.4243 0.0259 3.9463 -9.2948 -3.3727 -1.3558 0.5861 6.2154 23144.07 1.0000
t[2] 0.2191 0.0259 3.9392 -7.5038 -1.7452 0.1999 2.1749 8.0292 23066.49 1.0001
t[3] 1.2101 0.0259 3.9359 -6.3462 -0.7728 1.1562 3.1501 9.1211 23040.06 1.0001
eta 5.3180 0.0243 3.9255 2.0136 3.1925 4.3027 6.1234 14.7301 26040.94 1.0001
lp__ -107.7403 0.0121 2.1067 -112.8762 -108.8584 -107.3352 -106.1923 -104.8579 30287.68 1.0001

   

2B. Null Stan Model

 

\[\begin{align*} \tag{1} \mathcal{M}_1:\ Y_{ij}&=\mu+\sigma_\epsilon d_i+\epsilon_{ij}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ij}&=\mu+\epsilon_{ij},\qquad\epsilon_{ij}\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,\sigma_\epsilon^2) \text{ for } i=1,\dotsb,a;\ j=1,\dotsb,n_i. \end{align*}\]

\[\begin{equation} \tag{2} \pi(\mu,\sigma_\epsilon^2)\propto 1/\ \sigma_\epsilon^{2} \end{equation}\]

\[\begin{equation} \tag{3} d_i\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]

\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]

 

Specify the null model \(\mathcal{M}_0\) in (1). Compile and fit the model in Stan.

stancodeM0 <- "
data {
  int<lower=1> N;        // total number of subjects
  vector[N] Y;           // responses
  real lwr;              // lower bound for the grand mean
  real upr;              // upper bound for the grand mean
}

parameters {
  real<lower=lwr, upper=upr> mu;     // grand mean
  real<lower=0> sigma;               // standard deviation of the error
}

model {
  target += uniform_lpdf(mu | lwr, upr);
  target += -log(sigma);                            // Jeffreys prior
  target += normal_lpdf(Y | mu, sigma);
}"

stanmodelM0 <- stan_model(model_code=stancodeM0)
stanfitM0 <- sampling(stanmodelM0, data=list("N"=length(dat), "Y"=c(dat),
                                             "lwr"=mean(dat)-6*sd(dat),
                                             "upr"=mean(dat)+6*sd(dat)),
                      iter=50000, warmup=10000, chains=4,
                      seed=277, refresh=0)

rstan::summary(stanfitM0)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu 12.7340 0.0033 1.1171 10.5249 12.0002 12.7352 13.4642 14.9440 111691.69 1.0000
sigma 6.0486 0.0025 0.8283 4.6920 5.4626 5.9577 6.5341 7.9256 106930.74 1.0000
lp__ -97.6764 0.0041 1.0394 -100.4816 -98.0745 -97.3517 -96.9380 -96.6667 64331.51 1.0001

   

3. Bayes Factor via Bridge Sampling

 

Compute log marginal likelihoods for full and null models via bridge sampling. Then, \(\textit{BF}_{10}=\frac{p(\text{Data}\ \mid\ \mathcal{M}_1)}{p(\text{Data}\ \mid\ \mathcal{M}_0)}\).

M1 <- bridge_sampler(stanfitM1, silent=T)
summary(M1)
## 
## Bridge sampling log marginal likelihood estimate 
## (method = "normal", repetitions = 1):
## 
##  -101.5422
## 
## Error Measures:
## 
##  Relative Mean-Squared Error: 3.843066e-06
##  Coefficient of Variation: 0.001960374
##  Percentage Error: 0%
## 
## Note:
## All error measures are approximate.
M0 <- bridge_sampler(stanfitM0, silent=T)
summary(M0)
## 
## Bridge sampling log marginal likelihood estimate 
## (method = "normal", repetitions = 1):
## 
##  -99.63538
## 
## Error Measures:
## 
##  Relative Mean-Squared Error: 1.547895e-07
##  Coefficient of Variation: 0.000393433
##  Percentage Error: 0%
## 
## Note:
## All error measures are approximate.
bf(M1, M0)
## Estimated Bayes factor in favor of M1 over M0: 0.14855

   

All-in-One R Script

 

library(rstan)
# devtools::install_github("quentingronau/bridgesampling@master")
library(bridgesampling)
library(BayesFactor)

run.this=T #if TRUE, test the hypothetical data; if FALSE, test the simulated data
if (run.this) {
  dat <- matrix(c(10,6,11,22,16,15,1,12,9,8,
                  13,8,14,23,18,17,1,15,12,9,
                  13,8,14,25,20,17,4,17,12,12), ncol=3,
                dimnames=list(NULL, paste0("Level",1:3))) #wide data format
  
  df <- data.frame("Level"=factor(rep(paste0("Level",1:3), each=10)),
                   "Response"=c(dat)) #long data format
} else {
  m1 <- 100; sd <- 20; n <- 24
  m2 <- m1 - pwr::pwr.t.test(n=n, power=.8)$d * sd #83.47737
  set.seed(277)
  dat <- matrix(c(rnorm(n, m1, sd),
                  rnorm(n, m2, sd)), ncol=2,
                dimnames=list(NULL, paste0("Level",1:2)))
  
  df <- data.frame("Level"=factor(rep(paste0("Level",1:2), each=n)),
                   "Response"=c(dat))
}

(JZS_BF <- anovaBF(Response~Level, data=df, progress=F))
# set.seed(277)
# draws <- posterior(JZS_BF, iterations=1e5, progress=F)
# summary(draws)
# plot(draws, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)

{
  stancodeM1 <- "
  data {
    int<lower=1> N;        // total number of subjects
    int<lower=2> C;        // number of conditions
    vector[N] Y;           // responses (ready for the ragged data structure)
    // int s[C];              // condition sizes (i.e., number of subjects in each condition) [removed features]
    array[C] int s;        // condition sizes [Stan 2.26+ syntax for array declarations]
    real<lower=0> ht;      // scale parameter of the standardized treatment effects
    real lwr;              // lower bound for the grand mean
    real upr;              // upper bound for the grand mean
  }

  parameters {
    real<lower=lwr, upper=upr> mu;     // grand mean
    real<lower=0> sigma;               // standard deviation of the error
    real<lower=0> gt;                  // variance of the standardized treatment effects
    vector[C] t;                       // treatment effects
  }

  transformed parameters {
    real<lower=0> eta;     // standard deviation of the treatment effects
    eta = sigma * sqrt(gt);
  }

  model {
    int pos;
    pos = 1;

    for (i in 1:C) {
      target += normal_lpdf(segment(Y, pos, s[i]) | mu + t[i], sigma);
      pos = pos + s[i];
    }
    target += normal_lpdf(t | 0, eta);

    target += uniform_lpdf(mu | lwr, upr);
    target += -log(sigma);                            // Jeffreys prior
    target += scaled_inv_chi_square_lpdf(gt | 1, ht); // Rouder et al. (2012)
  }"
  
  stanmodelM1 <- stan_model(model_code=stancodeM1)
  
  stancodeM0 <- "
  data {
    int<lower=1> N;        // total number of subjects
    vector[N] Y;           // responses
    real lwr;              // lower bound for the grand mean
    real upr;              // upper bound for the grand mean
  }

  parameters {
    real<lower=lwr, upper=upr> mu;     // grand mean
    real<lower=0> sigma;               // standard deviation of the error
  }

  model {
    target += uniform_lpdf(mu | lwr, upr);
    target += -log(sigma);                            // Jeffreys prior
    target += normal_lpdf(Y | mu, sigma);
  }"
  
  stanmodelM0 <- stan_model(model_code=stancodeM0)
} #Stan models


BFBS <- function(seed=277, diagnostics=F) {
  stanfitM0 <- sampling(stanmodelM0, data=list("N"=length(dat), "Y"=c(dat),
                                               "lwr"=mean(dat)-6*sd(dat),
                                               "upr"=mean(dat)+6*sd(dat)),
                        iter=50000, warmup=10000, chains=4,
                        seed=seed, refresh=0)
  stanfitM1 <- sampling(stanmodelM1, 
                        data=list("N"=length(dat), "C"=ncol(dat), "Y"=c(dat), 
                                  "s"=rep(nrow(dat),ncol(dat)), "ht"=1,
                                  "lwr"=mean(dat)-6*sd(dat), "upr"=mean(dat)+6*sd(dat)),
                        iter=50000, warmup=10000, chains=4,
                        seed=seed, refresh=0)
  
  set.seed(seed)
  M0 <- bridge_sampler(stanfitM0, silent=T)
  
  set.seed(seed)
  M1 <- bridge_sampler(stanfitM1, silent=T)
  
  if (diagnostics) {
    list("bf"=exp(M1$logml - M0$logml),
         "pererr.M1"=error_measures(M1)$cv,
         "pererr.M0"=error_measures(M0)$cv,
         "stan.M1"=stanfitM1, "stan.M0"=stanfitM0)
  } else {
    list("bf"=exp(M1$logml - M0$logml),
         "pererr.M1"=error_measures(M1)$cv,
         "pererr.M0"=error_measures(M0)$cv)
  }
}

BFBS()


# MCMC <- BFBS(diag=T)
# rstan::summary(MCMC$stan.M1)[[1]]
# rstan::summary(MCMC$stan.M0)[[1]]