============================================================================================================
PREVIOUS: weakly informative prior for covariance matrices, https://rpubs.com/sherloconan/1081051

THIS: inverse-Wishart prior for the error covariance matrix, https://rpubs.com/sherloconan/1085223

1. Getting Started

 

In Loftus and Masson (1994), some to-be-recalled 20-word lists are presented at a rate of 1, 2, or 5 sec per word. Suppose that the hypothetical experiment is run as a one-way within-subject (repeated-measures) design with three conditions.

dat <- matrix(c(10,6,11,22,16,15,1,12,9,8,
                13,8,14,23,18,17,1,15,12,9,
                13,8,14,25,20,17,4,17,12,12), ncol=3,
              dimnames=list(paste0("s",1:10), paste0("Level",1:3))) #wide data format
# dat <- dat * 100

df <- data.frame("ID"=factor(rep(paste0("s",1:10), 3)),
                 "Level"=factor(rep(paste0("Level",1:3), each=10)),
                 "Response"=c(dat)) #long data format

 

Or, testing a simulated data set.

mu.b <- 100; sigma.b <- 20; n <- 24
delta.mu <- 6.7; sigma.e <- 6.67 #rho=.9, power=.8
set.seed(277)
TrueScore <- rnorm(n, mu.b, sigma.b)
eij1 <- rnorm(n, 0, sigma.e)
eij2 <- rnorm(n, 0, sigma.e)

dat <- cbind("Level1"=TrueScore+eij1+delta.mu,
             "Level2"=TrueScore+eij2)
# dat <- dat * 100

df <- data.frame("ID"=factor(rep(paste0("s",1:n), 2)),
                 "Level"=factor(rep(c("Level1", "Level2"), each=n)),
                 "Response"=c(dat))
## The Bayes factors will be computed for the hypothetical data (in the first chunk).

 

Method: Constructing the Jeffreys-Zellner-Siow (JZS) Bayes factor.

\[\begin{align*} \tag{1} \mathcal{M}_1:\ Y_{ij}&=\mu+\sigma_\epsilon(d_i+b_j)+\epsilon_{ij}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ij}&=\mu+\sigma_\epsilon b_j+\epsilon_{ij},\qquad\epsilon_{ij}\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,\sigma_\epsilon^2) \text{ for } i=1,\dotsb,a;\ j=1,\dotsb,n. \end{align*}\]

\[\begin{equation} \tag{2} \pi(\mu,\sigma_\epsilon^2)\propto 1/\ \sigma_\epsilon^{2} \end{equation}\]

\[\begin{equation} \tag{3} d_i^\star\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]

\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]

\[\begin{equation} \tag{5} (d_1^\star,\dotsb,d_{a-1}^\star)=(d_1,\dotsb,d_a)\cdot\mathbf{Q} \end{equation}\]

\[\begin{equation} \tag{6} \mathbf{I}_a-\frac{1}{a}\mathbf{J}_a=\mathbf{Q}\cdot\mathbf{Q}^\top \end{equation}\]

\[\begin{equation} \tag{7} b_j\mid g_b\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g_b) \end{equation}\]

\[\begin{equation} \tag{8} g_b\sim\text{Scale-inv-}\chi^2(1,h_b^2) \end{equation}\]

\(\mathbf{Q}\) is an \(a\times(a-1)\) matrix of the \(a-1\) eigenvectors of unit length corresponding to the nonzero eigenvalues of the left side term in (6). For example, the projected main effect \(d^\star=(d_1-d_2)/\sqrt{2}\) when \(a=2\). In the other direction (given \(d_1+d_2=0\)), \(d_1=d^\star/\sqrt{2}\) and \(d_2=-d^\star/\sqrt{2}\).

set.seed(277)
(JZS_BF <- anovaBF(Response~Level+ID, data=df, whichRandom="ID", progress=F))
## Bayes factor analysis
## --------------
## [1] Level + ID : 35959.93 ±0.56%
## 
## Against denominator:
##   Response ~ ID 
## ---
## Bayes factor type: BFlinearModel, JZS

Note: Fixed effects are assumed. See the discussion here.

 

Markov chain Monte Carlo (MCMC) diagnostics. Plots of iterations versus sampled values for each variable in the chain.

set.seed(277)
draws <- posterior(JZS_BF, iterations=1e5, progress=F)
summary(draws)
## 
## Iterations = 1:1e+05
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 1e+05 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                  Mean      SD  Naive SE Time-series SE
## mu            12.7292  1.9846 0.0062759      0.0062759
## Level-Level1  -1.6542  0.2364 0.0007475      0.0008613
## Level-Level2   0.2538  0.2241 0.0007088      0.0007100
## Level-Level3   1.4004  0.2330 0.0007367      0.0008187
## ID-s1         -0.7238  2.0379 0.0064445      0.0064445
## ID-s10        -3.0369  2.0370 0.0064415      0.0064415
## ID-s2         -5.3524  2.0380 0.0064446      0.0064446
## ID-s3          0.2681  2.0370 0.0064416      0.0064416
## ID-s4         10.5135  2.0384 0.0064461      0.0064461
## ID-s5          5.2273  2.0393 0.0064489      0.0064489
## ID-s6          3.5731  2.0352 0.0064359      0.0064359
## ID-s7        -10.6347  2.0369 0.0064412      0.0064412
## ID-s8          1.9211  2.0361 0.0064386      0.0064386
## ID-s9         -1.7162  2.0395 0.0064495      0.0064495
## sig2           0.7938  0.3294 0.0010416      0.0023032
## g_Level        7.7913 94.4643 0.2987222      0.2987222
## g_ID          56.2794 39.9360 0.1262886      0.2052537
## 
## 2. Quantiles for each variable:
## 
##                  2.5%      25%      50%     75%    97.5%
## mu             8.7772  11.4887  12.7270 13.9666  16.6988
## Level-Level1  -2.1042  -1.8100  -1.6599 -1.5046  -1.1664
## Level-Level2  -0.1907   0.1090   0.2547  0.3998   0.6950
## Level-Level3   0.9238   1.2533   1.4041  1.5527   1.8510
## ID-s1         -4.7946  -2.0006  -0.7140  0.5661   3.3221
## ID-s10        -7.1093  -4.3147  -3.0322 -1.7511   0.9941
## ID-s2         -9.4374  -6.6315  -5.3413 -4.0637  -1.3013
## ID-s3         -3.8028  -1.0187   0.2716  1.5472   4.3035
## ID-s4          6.4649   9.2282  10.5097 11.7944  14.6162
## ID-s5          1.1702   3.9469   5.2212  6.5136   9.2945
## ID-s6         -0.4687   2.2866   3.5639  4.8580   7.6215
## ID-s7        -14.6990 -11.9076 -10.6180 -9.3487  -6.6167
## ID-s8         -2.1454   0.6405   1.9223  3.2007   5.9699
## ID-s9         -5.8027  -3.0012  -1.7110 -0.4266   2.3528
## sig2           0.3849   0.5714   0.7223  0.9325   1.6225
## g_Level        0.5096   1.5649   2.9401  6.0788  36.1203
## g_ID          14.7594  31.1880  46.1816 68.9989 157.4876
plot(draws, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)

   

2A. Full Stan Model (unstandardized effect sizes)

 

\[\begin{align*} \tag{a} \mathcal{M}_1:\ Y_{ij}&=\mu+t_i+\epsilon_{ij}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ij}&=\mu+\epsilon_{ij}\\ (\epsilon_{1j},\dotsb,\epsilon_{aj})^\top&\overset{\text{i.i.d.}}{\sim}\mathbf{\mathcal{N}}_a(\mathbf{0},\mathbf{\Sigma})\qquad\text{for } i=1,\dotsb,a;\ j=1,\dotsb,n. \end{align*}\]

\[\begin{equation} \tag{b} \pi(\mu)\propto 1 \end{equation}\]

\[\begin{equation} \tag{c} \mathbf{\Sigma}\sim\mathcal{W}^{-1}(\mathbf{\Psi},\ \nu) \end{equation}\]

\[\begin{equation} \tag{3} t_i^\star\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]

\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]

\[\begin{equation} \tag{5} (t_1^\star,\dotsb,t_{a-1}^\star)=(t_1,\dotsb,t_a)\cdot\mathbf{Q} \end{equation}\]

\[\begin{equation} \tag{6} \mathbf{I}_a-\frac{1}{a}\mathbf{J}_a=\mathbf{Q}\cdot\mathbf{Q}^\top \end{equation}\]

\(\mathbf{Q}\) is an \(a\times(a-1)\) matrix of the \(a-1\) eigenvectors of unit length corresponding to the nonzero eigenvalues of the left side term in (6). For example, the projected main effect \(t^\star=(t_1-t_2)/\sqrt{2}\) when \(a=2\). In the other direction (given \(t_1+t_2=0\)), \(t_1=t^\star/\sqrt{2}\) and \(t_2=-t^\star/\sqrt{2}\).

computeQ <- function(C) {
  #' reduced parametrization for the fixed treatment effects
  S <- diag(C) - matrix(1, nrow = C, ncol = C) / C
  e <- qr(S)
  Q <- qr.Q(e) %*% diag(sign(diag(qr.R(e))))
  Q[, which(abs(e$qraux) > 1e-3), drop = FALSE]
}

 

Specify the full model \(\mathcal{M}_1\) in (a). stancodeM1 initially declares the reduced fixed treatment effects tf. Note that the effects are not standardized (\(d_i=t_i/\sigma_\epsilon\)) but the data are scaled. Compile and fit the model in Stan.

\[\begin{equation} \tag{c.1} \mathbf{\Sigma}\sim\mathcal{W}^{-1}(\mathbf{I}_{a},\ a) \end{equation}\]

Note that to compute the (log) marginal likelihood for a Stan model, we need to specify the model in a certain way. Instead of using “~” signs for specifying distributions, we need to directly use the (log) density functions. The reason for this is that when using the “~” sign, constant terms are dropped which are not needed for sampling from the posterior. However, for computing the marginal likelihood, these constants need to be retained. For instance, instead of writing y ~ normal(mu, sigma); we would need to write target += normal_lpdf(y | mu, sigma); (Gronau, 2021). See also the discussion regarding the implicit uniform prior for computing the log marginal likelihood.

stancodeM1 <- "
data {
  int<lower=1> N;        // number of subjects
  int<lower=2> C;        // number of conditions
  // vector[C] Y[N];        // responses [removed features]
  array[N] vector[C] Y;  // responses [Stan 2.26+ syntax for array declarations]
  matrix[C,C-1] P;       // projecting C-1 fixed effects into C
  real<lower=0> ht;      // scale parameter of the variance of the reduced treatment effect
  matrix[C,C] V;         // scale matrix of the inverse-Wishart prior
  real<lower=C-1> nu;    // degrees of freedom of the inverse-Wishart prior
  real lwr;              // lower bound for the grand mean
  real upr;              // upper bound for the grand mean
}

parameters {
  real<lower=lwr, upper=upr> mu; // grand mean
  cov_matrix[C] Sigma;           // error covariance matrix
  real<lower=0> gt;              // variance of the reduced treatment effect
  vector[C-1] tf;                // reduced treatment effect
}

transformed parameters {
  vector[C] t;           // treatment effect
  t = P * tf;
}

model {
  target += uniform_lpdf(mu | lwr, upr);
  target += inv_wishart_lpdf(Sigma | nu, V);
  target += scaled_inv_chi_square_lpdf(gt | 1, ht);
  target += normal_lpdf(tf | 0, sqrt(gt));
  for (k in 1:N) {
    target += multi_normal_lpdf(Y[k] | mu + t, Sigma);
  }
}"

stanmodelM1 <- stan_model(model_code=stancodeM1)
datalistM1 <- list("N"=nrow(dat), "C"=ncol(dat),
                   "Y"=dat/sd(dat), #scale the data
                   "P"=computeQ(ncol(dat)), "ht"=.5,
                   "V"=diag(ncol(dat)), "nu"=ncol(dat))
datalistM1$lwr <- mean(datalistM1[["Y"]])-6*sd(datalistM1[["Y"]]) # -6 * 1
datalistM1$upr <- mean(datalistM1[["Y"]])+6*sd(datalistM1[["Y"]])
stanfitM1 <- sampling(stanmodelM1, data=datalistM1,
                      iter=50000, warmup=10000, chains=4, seed=277, refresh=0)

rstan::summary(stanfitM1)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu 2.1556 0.0010 0.3392 1.4795 1.9424 2.1554 2.3696 2.8294 124133.64 1.0001
Sigma[1,1] 1.2071 0.0033 0.6838 0.4755 0.7726 1.0367 1.4352 2.9667 43957.39 1.0001
Sigma[1,2] 1.1183 0.0033 0.6691 0.3970 0.6940 0.9538 1.3423 2.8355 41644.26 1.0001
Sigma[1,3] 1.0995 0.0033 0.6563 0.3891 0.6817 0.9362 1.3205 2.7896 40607.91 1.0000
Sigma[2,1] 1.1183 0.0033 0.6691 0.3970 0.6940 0.9538 1.3423 2.8355 41644.26 1.0001
Sigma[2,2] 1.3125 0.0035 0.7414 0.5163 0.8420 1.1297 1.5592 3.2194 44336.54 1.0001
Sigma[2,3] 1.1400 0.0034 0.6812 0.4014 0.7061 0.9718 1.3688 2.8906 40970.54 1.0001
Sigma[3,1] 1.0995 0.0033 0.6563 0.3891 0.6817 0.9362 1.3205 2.7896 40607.91 1.0000
Sigma[3,2] 1.1400 0.0034 0.6812 0.4014 0.7061 0.9718 1.3688 2.8906 40970.54 1.0001
Sigma[3,3] 1.2695 0.0035 0.7149 0.4986 0.8130 1.0906 1.5090 3.1085 42421.92 1.0001
gt 0.3779 0.0046 1.2566 0.0397 0.0947 0.1672 0.3329 1.8549 75394.60 1.0000
tf[1] -0.3293 0.0003 0.1126 -0.5474 -0.4018 -0.3313 -0.2590 -0.0994 122646.75 1.0000
tf[2] -0.1304 0.0003 0.1161 -0.3589 -0.2048 -0.1311 -0.0573 0.1020 133501.98 1.0000
t[1] -0.2689 0.0003 0.0920 -0.4470 -0.3281 -0.2705 -0.2114 -0.0812 122646.75 1.0000
t[2] 0.0422 0.0003 0.0936 -0.1447 -0.0171 0.0422 0.1019 0.2280 133492.43 1.0000
t[3] 0.2266 0.0003 0.0946 0.0345 0.1673 0.2279 0.2873 0.4105 128015.65 1.0001
lp__ -30.1349 0.0126 2.6863 -36.4063 -31.6730 -29.7439 -28.1765 -26.0760 45239.40 1.0001
mean(dat/sd(dat))
## [1] 2.162076
cov(dat/sd(dat))
##           Level1    Level2    Level3
## Level1 0.9674355 0.9994698 0.9834526
## Level2 0.9994698 1.0635384 1.0186903
## Level3 0.9834526 1.0186903 1.0238158

   

2B. Null Stan Model

 

\[\begin{align*} \tag{a} \mathcal{M}_1:\ Y_{ij}&=\mu+t_i+\epsilon_{ij}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ij}&=\mu+\epsilon_{ij}\\ (\epsilon_{1j},\dotsb,\epsilon_{aj})^\top&\overset{\text{i.i.d.}}{\sim}\mathbf{\mathcal{N}}_a(\mathbf{0},\mathbf{\Sigma})\qquad\text{for } i=1,\dotsb,a;\ j=1,\dotsb,n. \end{align*}\]

\[\begin{equation} \tag{b} \pi(\mu)\propto 1 \end{equation}\]

\[\begin{equation} \tag{c} \mathbf{\Sigma}\sim\mathcal{W}^{-1}(\mathbf{\Psi},\ \nu) \end{equation}\]

\[\begin{equation} \tag{3} t_i^\star\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]

\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]

\[\begin{equation} \tag{5} (t_1^\star,\dotsb,t_{a-1}^\star)=(t_1,\dotsb,t_a)\cdot\mathbf{Q} \end{equation}\]

\[\begin{equation} \tag{6} \mathbf{I}_a-\frac{1}{a}\mathbf{J}_a=\mathbf{Q}\cdot\mathbf{Q}^\top \end{equation}\]

\(\mathbf{Q}\) is an \(a\times(a-1)\) matrix of the \(a-1\) eigenvectors of unit length corresponding to the nonzero eigenvalues of the left side term in (6). For example, the projected main effect \(t^\star=(t_1-t_2)/\sqrt{2}\) when \(a=2\). In the other direction (given \(t_1+t_2=0\)), \(t_1=t^\star/\sqrt{2}\) and \(t_2=-t^\star/\sqrt{2}\).

 

Specify the null model \(\mathcal{M}_0\) in (a). Compile and fit the model in Stan.

\[\begin{equation} \tag{c.1} \mathbf{\Sigma}\sim\mathcal{W}^{-1}(\mathbf{I}_{a},\ a) \end{equation}\]

stancodeM0 <- "
data {
  int<lower=1> N;        // number of subjects
  int<lower=2> C;        // number of conditions
  // vector[C] Y[N];        // responses [removed features]
  array[N] vector[C] Y;  // responses [Stan 2.26+ syntax for array declarations]
  matrix[C,C] V;         // scale matrix of the inverse-Wishart prior
  real<lower=C-1> nu;    // degrees of freedom of the inverse-Wishart prior
  real lwr;              // lower bound for the grand mean
  real upr;              // upper bound for the grand mean
}

parameters {
  real<lower=lwr, upper=upr> mu; // grand mean
  cov_matrix[C] Sigma;           // error covariance matrix
}

transformed parameters {
  vector[C] muS;         // grand mean vector
  for (i in 1:C) {
    muS[i] = mu;
  }
}

model {
  target += uniform_lpdf(mu | lwr, upr);
  target += inv_wishart_lpdf(Sigma | nu, V);
  for (k in 1:N) {
    target += multi_normal_lpdf(Y[k] | muS, Sigma);
  }
}"

stanmodelM0 <- stan_model(model_code=stancodeM0)
stanfitM0 <- sampling(stanmodelM0, data=datalistM1[-c(4,5)],
                      iter=50000, warmup=10000, chains=4, seed=277, refresh=0)

rstan::summary(stanfitM0)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu 2.0916 0.0021 0.4708 1.1467 1.7947 2.0932 2.3898 3.0293 51976.73 1.0000
Sigma[1,1] 1.3772 0.0047 0.8848 0.4950 0.8304 1.1420 1.6348 3.6719 36053.65 1.0001
Sigma[1,2] 1.2123 0.0046 0.8140 0.3835 0.7096 1.0013 1.4535 3.3372 31123.22 1.0001
Sigma[1,3] 1.1462 0.0046 0.8019 0.3226 0.6526 0.9411 1.3826 3.2372 30012.84 1.0001
Sigma[2,1] 1.2123 0.0046 0.8140 0.3835 0.7096 1.0013 1.4535 3.3372 31123.22 1.0001
Sigma[2,2] 1.4289 0.0050 0.8828 0.5298 0.8822 1.1992 1.6908 3.7435 30958.54 1.0001
Sigma[2,3] 1.2999 0.0052 0.8846 0.4191 0.7578 1.0672 1.5535 3.6134 29040.74 1.0001
Sigma[3,1] 1.1462 0.0046 0.8019 0.3226 0.6526 0.9411 1.3826 3.2372 30012.84 1.0001
Sigma[3,2] 1.2999 0.0052 0.8846 0.4191 0.7578 1.0672 1.5535 3.6134 29040.74 1.0001
Sigma[3,3] 1.4887 0.0056 0.9770 0.5278 0.8920 1.2298 1.7659 4.0336 30039.28 1.0001
muS[1] 2.0916 0.0021 0.4708 1.1467 1.7947 2.0932 2.3898 3.0293 51976.73 1.0000
muS[2] 2.0916 0.0021 0.4708 1.1467 1.7947 2.0932 2.3898 3.0293 51976.73 1.0000
muS[3] 2.0916 0.0021 0.4708 1.1467 1.7947 2.0932 2.3898 3.0293 51976.73 1.0000
lp__ -32.4984 0.0106 2.1629 -37.7277 -33.6743 -32.1213 -30.9072 -29.4231 41555.65 1.0001
mean(dat/sd(dat))
## [1] 2.162076
cov(dat/sd(dat))
##           Level1    Level2    Level3
## Level1 0.9674355 0.9994698 0.9834526
## Level2 0.9994698 1.0635384 1.0186903
## Level3 0.9834526 1.0186903 1.0238158

   

3. Bayes Factor via Bridge Sampling

 

The newly proposed models in (a): (i) do not include an explicit subject random effects term, (ii) introduce a more general structure of the error term, (iii) do not standardize the treatment effects, and (iv) scale the data.

 

Compute log marginal likelihoods for full and null models via bridge sampling. Then, \(\textit{BF}_{10}=\frac{p(\text{Data}\ \mid\ \mathcal{M}_1)}{p(\text{Data}\ \mid\ \mathcal{M}_0)}\).

M1 <- bridge_sampler(stanfitM1, silent=T)
summary(M1)
## 
## Bridge sampling log marginal likelihood estimate 
## (method = "normal", repetitions = 1):
## 
##  -33.39117
## 
## Error Measures:
## 
##  Relative Mean-Squared Error: 6.167597e-06
##  Coefficient of Variation: 0.002483465
##  Percentage Error: 0%
## 
## Note:
## All error measures are approximate.
M0 <- bridge_sampler(stanfitM0, silent=T)
summary(M0)
## 
## Bridge sampling log marginal likelihood estimate 
## (method = "normal", repetitions = 1):
## 
##  -34.77583
## 
## Error Measures:
## 
##  Relative Mean-Squared Error: 4.782054e-06
##  Coefficient of Variation: 0.002186791
##  Percentage Error: 0%
## 
## Note:
## All error measures are approximate.
bf(M1, M0)
## Estimated Bayes factor in favor of M1 over M0: 3.99348

 

Do we expect the Bayes factor estimates to vary significantly despite different model specifications?

   

4. \(\nu\)+=1

 

\[\begin{equation} \tag{c.2} \mathbf{\Sigma}\sim\mathcal{W}^{-1}(\mathbf{I}_{a},\ a+1) \end{equation}\]

datalistM1$nu <- ncol(dat)+1

stanfitM1 <- sampling(stanmodelM1, data=datalistM1,
                      iter=50000, warmup=10000, chains=4, seed=277, refresh=0)

rstan::summary(stanfitM1)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu 2.1581 0.0009 0.3216 1.5171 1.9560 2.1580 2.3605 2.7978 126136.07 1
Sigma[1,1] 1.0745 0.0028 0.5779 0.4423 0.7069 0.9366 1.2729 2.5101 41829.80 1
Sigma[1,2] 0.9953 0.0029 0.5679 0.3724 0.6351 0.8616 1.1906 2.4070 38494.72 1
Sigma[1,3] 0.9795 0.0028 0.5597 0.3644 0.6254 0.8467 1.1701 2.3695 38638.07 1
Sigma[2,1] 0.9953 0.0029 0.5679 0.3724 0.6351 0.8616 1.1906 2.4070 38494.72 1
Sigma[2,2] 1.1689 0.0031 0.6293 0.4796 0.7697 1.0195 1.3851 2.7313 40341.96 1
Sigma[2,3] 1.0157 0.0030 0.5829 0.3765 0.6477 0.8786 1.2153 2.4583 37734.75 1
Sigma[3,1] 0.9795 0.0028 0.5597 0.3644 0.6254 0.8467 1.1701 2.3695 38638.07 1
Sigma[3,2] 1.0157 0.0030 0.5829 0.3765 0.6477 0.8786 1.2153 2.4583 37734.75 1
Sigma[3,3] 1.1317 0.0030 0.6109 0.4670 0.7448 0.9861 1.3400 2.6574 40218.21 1
gt 0.4057 0.0125 3.4701 0.0402 0.0951 0.1673 0.3293 1.8736 76800.65 1
tf[1] -0.3319 0.0003 0.1078 -0.5407 -0.4015 -0.3339 -0.2642 -0.1123 126415.15 1
tf[2] -0.1316 0.0003 0.1107 -0.3505 -0.2029 -0.1322 -0.0610 0.0905 136349.99 1
t[1] -0.2710 0.0002 0.0880 -0.4415 -0.3278 -0.2726 -0.2157 -0.0917 126415.15 1
t[2] 0.0425 0.0002 0.0896 -0.1350 -0.0147 0.0425 0.0998 0.2208 135296.73 1
t[3] 0.2285 0.0002 0.0900 0.0459 0.1717 0.2298 0.2866 0.4038 132586.92 1
lp__ -28.3448 0.0125 2.6646 -34.5977 -29.8547 -27.9541 -26.4085 -24.3125 45432.12 1
M1 <- bridge_sampler(stanfitM1, silent=T)
summary(M1)
## 
## Bridge sampling log marginal likelihood estimate 
## (method = "normal", repetitions = 1):
## 
##  -32.14397
## 
## Error Measures:
## 
##  Relative Mean-Squared Error: 5.658335e-06
##  Coefficient of Variation: 0.002378725
##  Percentage Error: 0%
## 
## Note:
## All error measures are approximate.
stanfitM0 <- sampling(stanmodelM0, data=datalistM1[-c(4,5)],
                      iter=50000, warmup=10000, chains=4, seed=277, refresh=0)
## Warning: There were 1 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: Examine the pairs() plot to diagnose sampling problems
rstan::summary(stanfitM0)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu 2.0921 0.0019 0.4451 1.2085 1.8080 2.0906 2.3757 2.9803 57175.01 1.0001
Sigma[1,1] 1.2118 0.0036 0.7212 0.4635 0.7602 1.0254 1.4411 3.0603 41131.14 1.0001
Sigma[1,2] 1.0645 0.0035 0.6601 0.3628 0.6487 0.8986 1.2789 2.7620 35523.06 1.0001
Sigma[1,3] 1.0052 0.0035 0.6508 0.3078 0.5974 0.8430 1.2166 2.6703 34258.82 1.0001
Sigma[2,1] 1.0645 0.0035 0.6601 0.3628 0.6487 0.8986 1.2789 2.7620 35523.06 1.0001
Sigma[2,2] 1.2606 0.0038 0.7169 0.4970 0.8060 1.0807 1.4938 3.1079 34903.33 1.0001
Sigma[2,3] 1.1447 0.0040 0.7190 0.3946 0.6946 0.9613 1.3684 3.0026 32854.21 1.0001
Sigma[3,1] 1.0052 0.0035 0.6508 0.3078 0.5974 0.8430 1.2166 2.6703 34258.82 1.0001
Sigma[3,2] 1.1447 0.0040 0.7190 0.3946 0.6946 0.9613 1.3684 3.0026 32854.21 1.0001
Sigma[3,3] 1.3147 0.0043 0.7955 0.4943 0.8168 1.1102 1.5576 3.3674 34106.94 1.0001
muS[1] 2.0921 0.0019 0.4451 1.2085 1.8080 2.0906 2.3757 2.9803 57175.01 1.0001
muS[2] 2.0921 0.0019 0.4451 1.2085 1.8080 2.0906 2.3757 2.9803 57175.01 1.0001
muS[3] 2.0921 0.0019 0.4451 1.2085 1.8080 2.0906 2.3757 2.9803 57175.01 1.0001
lp__ -31.1359 0.0101 2.1281 -36.2739 -32.3107 -30.7570 -29.5698 -28.1179 44334.45 1.0000
M0 <- bridge_sampler(stanfitM0, silent=T)
summary(M0)
## 
## Bridge sampling log marginal likelihood estimate 
## (method = "normal", repetitions = 1):
## 
##  -33.86874
## 
## Error Measures:
## 
##  Relative Mean-Squared Error: 4.222428e-06
##  Coefficient of Variation: 0.002054855
##  Percentage Error: 0%
## 
## Note:
## All error measures are approximate.
bf(M1, M0) #Bayes factor
## Estimated Bayes factor in favor of M1 over M0: 5.61123

 

Do we expect the Bayes factor estimates to vary significantly despite different model specifications?

   

5. Prior and Posterior Means

 

\[\begin{equation} \tag{c.3} \mathbf{\Sigma}\sim\mathcal{W}^{-1}\left((\nu_0-a-1)\cdot\mathbf{S},\ \nu_0\right) \end{equation}\]

Both prior and posterior means are the sample covariance matrix. Set \(\nu_0=a+2\).

datalistM1$nu <- ncol(dat)+2
datalistM1$V <- cov(datalistM1$Y)

#Increase MCMC iterations
stanfitM1 <- sampling(stanmodelM1, data=datalistM1,
                      iter=100000, warmup=50000, chains=4, seed=277, refresh=0)

rstan::summary(stanfitM1)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu 2.1554 0.0009 0.3172 1.5260 1.9530 2.1560 2.3573 2.7843 132500.25 1
Sigma[1,1] 0.9660 0.0024 0.4832 0.4132 0.6507 0.8512 1.1450 2.1848 41076.62 1
Sigma[1,2] 0.9981 0.0025 0.5026 0.4233 0.6695 0.8788 1.1845 2.2688 40854.22 1
Sigma[1,3] 0.9821 0.0024 0.4938 0.4171 0.6594 0.8647 1.1654 2.2311 40921.06 1
Sigma[2,1] 0.9981 0.0025 0.5026 0.4233 0.6695 0.8788 1.1845 2.2688 40854.22 1
Sigma[2,2] 1.0622 0.0026 0.5311 0.4551 0.7150 0.9369 1.2594 2.3986 41391.03 1
Sigma[2,3] 1.0174 0.0025 0.5145 0.4284 0.6809 0.8958 1.2095 2.3148 40757.09 1
Sigma[3,1] 0.9821 0.0024 0.4938 0.4171 0.6594 0.8647 1.1654 2.2311 40921.06 1
Sigma[3,2] 1.0174 0.0025 0.5145 0.4284 0.6809 0.8958 1.2095 2.3148 40757.09 1
Sigma[3,3] 1.0224 0.0025 0.5115 0.4386 0.6883 0.9013 1.2127 2.3179 41386.71 1
gt 0.4049 0.0202 5.6790 0.0428 0.0972 0.1701 0.3304 1.8563 78704.34 1
tf[1] -0.3578 0.0001 0.0322 -0.4213 -0.3784 -0.3580 -0.3376 -0.2929 127203.98 1
tf[2] -0.1419 0.0001 0.0493 -0.2396 -0.1736 -0.1421 -0.1106 -0.0430 137760.47 1
t[1] -0.2921 0.0001 0.0263 -0.3440 -0.3089 -0.2923 -0.2756 -0.2392 127203.98 1
t[2] 0.0457 0.0001 0.0389 -0.0318 0.0209 0.0457 0.0705 0.1235 125636.31 1
t[3] 0.2464 0.0001 0.0356 0.1750 0.2238 0.2466 0.2692 0.3163 151258.06 1
lp__ -12.7991 0.0111 2.6414 -18.9579 -14.3152 -12.4200 -10.8702 -8.7938 56123.04 1
M1 <- bridge_sampler(stanfitM1, silent=T)
summary(M1)
## 
## Bridge sampling log marginal likelihood estimate 
## (method = "normal", repetitions = 1):
## 
##  -22.00005
## 
## Error Measures:
## 
##  Relative Mean-Squared Error: 6.926012e-06
##  Coefficient of Variation: 0.002631732
##  Percentage Error: 0%
## 
## Note:
## All error measures are approximate.
stanfitM0 <- sampling(stanmodelM0, data=datalistM1[-c(4,5)],
                      iter=100000, warmup=50000, chains=4, seed=277, refresh=0)
## Warning: There were 597 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: Examine the pairs() plot to diagnose sampling problems
rstan::summary(stanfitM0)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu 1.2708 0.0101 1.0836 -0.8580 0.5610 1.2634 1.9662 3.4554 11426.67 1.0005
Sigma[1,1] 2.2607 0.0207 2.3525 0.4940 0.9240 1.4711 2.6465 8.8663 12938.85 1.0005
Sigma[1,2] 2.4739 0.0231 2.6002 0.4917 0.9678 1.5928 2.9433 9.7780 12643.87 1.0005
Sigma[1,3] 2.5700 0.0247 2.7581 0.4532 0.9522 1.6301 3.0989 10.3053 12479.94 1.0005
Sigma[2,1] 2.4739 0.0231 2.6002 0.4917 0.9678 1.5928 2.9433 9.7780 12643.87 1.0005
Sigma[2,2] 2.8208 0.0258 2.8994 0.5648 1.1041 1.8394 3.4041 10.9745 12627.28 1.0005
Sigma[2,3] 2.9535 0.0275 3.0785 0.5387 1.1056 1.9110 3.6132 11.5865 12546.63 1.0005
Sigma[3,1] 2.5700 0.0247 2.7581 0.4532 0.9522 1.6301 3.0989 10.3053 12479.94 1.0005
Sigma[3,2] 2.9535 0.0275 3.0785 0.5387 1.1056 1.9110 3.6132 11.5865 12546.63 1.0005
Sigma[3,3] 3.1694 0.0293 3.2757 0.5659 1.1726 2.0648 3.9136 12.3552 12532.56 1.0005
muS[1] 1.2708 0.0101 1.0836 -0.8580 0.5610 1.2634 1.9662 3.4554 11426.67 1.0005
muS[2] 1.2708 0.0101 1.0836 -0.8580 0.5610 1.2634 1.9662 3.4554 11426.67 1.0005
muS[3] 1.2708 0.0101 1.0836 -0.8580 0.5610 1.2634 1.9662 3.4554 11426.67 1.0005
lp__ -31.0991 0.0100 2.0858 -36.1437 -32.2181 -30.7279 -29.5740 -28.1581 43122.11 1.0001
M0 <- bridge_sampler(stanfitM0, silent=T)
summary(M0)
## 
## Bridge sampling log marginal likelihood estimate 
## (method = "normal", repetitions = 1):
## 
##  -35.18453
## 
## Error Measures:
## 
##  Relative Mean-Squared Error: 2.31987e-05
##  Coefficient of Variation: 0.004816503
##  Percentage Error: 0%
## 
## Note:
## All error measures are approximate.
bf(M1, M0) #Bayes factor
## Estimated Bayes factor in favor of M1 over M0: 532041.14440

 

Do we expect the Bayes factor estimates to vary significantly despite different model specifications?

   

All-in-One R Script

 

library(rstan)
# devtools::install_github("quentingronau/bridgesampling@master")
library(bridgesampling)
library(BayesFactor)

run.this=T #if TRUE, test the hypothetical data; if FALSE, test the simulated data
if (run.this) {
  dat <- matrix(c(10,6,11,22,16,15,1,12,9,8,
                  13,8,14,23,18,17,1,15,12,9,
                  13,8,14,25,20,17,4,17,12,12), ncol=3,
                dimnames=list(paste0("s",1:10), paste0("Level",1:3))) #wide data format

  df <- data.frame("ID"=factor(rep(paste0("s",1:10), 3)),
                   "Level"=factor(rep(paste0("Level",1:3), each=10)),
                   "Response"=c(dat)) #long data format
} else {
  mu.b <- 100; sigma.b <- 20; n <- 24
  delta.mu <- 6.7; sigma.e <- 6.67 #rho=.9, power=.8
  set.seed(277)
  TrueScore <- rnorm(n, mu.b, sigma.b)
  eij1 <- rnorm(n, 0, sigma.e)
  eij2 <- rnorm(n, 0, sigma.e)

  dat <- cbind("Level1" = TrueScore + eij1 + delta.mu,
               "Level2" = TrueScore + eij2)

  df <- data.frame("ID" = factor(rep(paste0("s",1:n), 2)),
                   "Level" = factor(rep(c("Level1", "Level2"), each = n)),
                   "Response" = as.vector(dat))
}

set.seed(277)
(JZS_BF <- anovaBF(Response~Level+ID, data=df, whichRandom="ID", progress=F))
# set.seed(277)
# draws <- posterior(JZS_BF, iterations=1e5, progress=F)
# summary(draws)
# plot(draws, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)


computeQ <- function(C) {
  #' reduced parametrization for the fixed treatment effects
  S <- diag(C) - matrix(1, nrow = C, ncol = C) / C
  e <- qr(S)
  Q <- qr.Q(e) %*% diag(sign(diag(qr.R(e))))
  Q[, which(abs(e$qraux) > 1e-3), drop = FALSE]
}

{
  stancodeM1 <- "
  data {
    int<lower=1> N;        // number of subjects
    int<lower=2> C;        // number of conditions
    // vector[C] Y[N];        // responses [removed features]
    array[N] vector[C] Y;  // responses [Stan 2.26+ syntax for array declarations]
    matrix[C,C-1] P;       // projecting C-1 fixed effects into C
    real<lower=0> ht;      // scale parameter of the variance of the reduced treatment effect
    matrix[C,C] V;         // scale matrix of the inverse-Wishart prior
    real<lower=C-1> nu;    // degrees of freedom of the inverse-Wishart prior
    real lwr;              // lower bound for the grand mean
    real upr;              // upper bound for the grand mean
  }

  parameters {
    real<lower=lwr, upper=upr> mu; // grand mean
    cov_matrix[C] Sigma;           // error covariance matrix
    real<lower=0> gt;              // variance of the reduced treatment effect
    vector[C-1] tf;                // reduced treatment effect
  }

  transformed parameters {
    vector[C] t;           // treatment effect
    t = P * tf;
  }

  model {
    target += uniform_lpdf(mu | lwr, upr);
    target += inv_wishart_lpdf(Sigma | nu, V);
    target += scaled_inv_chi_square_lpdf(gt | 1, ht);
    target += normal_lpdf(tf | 0, sqrt(gt));
    for (k in 1:N) {
      target += multi_normal_lpdf(Y[k] | mu + t, Sigma);
    }
  }"

  stanmodelM1 <- stan_model(model_code=stancodeM1)

  stancodeM0 <- "
  data {
    int<lower=1> N;        // number of subjects
    int<lower=2> C;        // number of conditions
    // vector[C] Y[N];        // responses [removed features]
    array[N] vector[C] Y;  // responses [Stan 2.26+ syntax for array declarations]
    matrix[C,C] V;         // scale matrix of the inverse-Wishart prior
    real<lower=C-1> nu;    // degrees of freedom of the inverse-Wishart prior
    real lwr;              // lower bound for the grand mean
    real upr;              // upper bound for the grand mean
  }

  parameters {
    real<lower=lwr, upper=upr> mu; // grand mean
    cov_matrix[C] Sigma;           // error covariance matrix
  }

  transformed parameters {
    vector[C] muS;         // grand mean vector
    for (i in 1:C) {
      muS[i] = mu;
    }
  }

  model {
    target += uniform_lpdf(mu | lwr, upr);
    target += inv_wishart_lpdf(Sigma | nu, V);
    for (k in 1:N) {
      target += multi_normal_lpdf(Y[k] | muS, Sigma);
    }
  }"

  stanmodelM0 <- stan_model(model_code=stancodeM0)
} #Stan models

datalistM1 <- list("N"=nrow(dat), "C"=ncol(dat),
                   "Y"=dat/sd(dat), #scale the data
                   "P"=computeQ(ncol(dat)), "ht"=.5,
                   "V"=diag(ncol(dat)), "nu"=ncol(dat)) # nu += 1
datalistM1$lwr <- mean(datalistM1[["Y"]])-6*sd(datalistM1[["Y"]])
datalistM1$upr <- mean(datalistM1[["Y"]])+6*sd(datalistM1[["Y"]])


BFBS <- function(seed=277, diagnostics=F) {
  stanfitM0 <- sampling(stanmodelM0, data=datalistM1[-c(4,5)],
                        iter=50000, warmup=10000, chains=4, seed=seed, refresh=0)

  stanfitM1 <- sampling(stanmodelM1, data=datalistM1,
                        iter=50000, warmup=10000, chains=4, seed=seed, refresh=0)
  
  set.seed(seed)
  M0 <- bridge_sampler(stanfitM0, silent=T)
  
  set.seed(seed)
  M1 <- bridge_sampler(stanfitM1, silent=T)
  
  if (diagnostics) {
    list("bf"=exp(M1$logml - M0$logml),
         "pererr.M1"=error_measures(M1)$cv,
         "pererr.M0"=error_measures(M0)$cv,
         "stan.M1"=stanfitM1, "stan.M0"=stanfitM0)
  } else {
    list("bf"=exp(M1$logml - M0$logml),
         "pererr.M1"=error_measures(M1)$cv,
         "pererr.M0"=error_measures(M0)$cv)
  }
}

BFBS()


# MCMC <- BFBS(diag=T)
# rstan::summary(MCMC$stan.M1)[[1]]
# rstan::summary(MCMC$stan.M0)[[1]]