============================================================================================================
PREVIOUS: weakly informative prior for covariance matrices, https://rpubs.com/sherloconan/1081051
THIS: inverse-Wishart prior for the error covariance matrix, https://rpubs.com/sherloconan/1085223
In Loftus and Masson (1994), some to-be-recalled 20-word lists are presented at a rate of 1, 2, or 5 sec per word. Suppose that the hypothetical experiment is run as a one-way within-subject (repeated-measures) design with three conditions.
dat <- matrix(c(10,6,11,22,16,15,1,12,9,8,
13,8,14,23,18,17,1,15,12,9,
13,8,14,25,20,17,4,17,12,12), ncol=3,
dimnames=list(paste0("s",1:10), paste0("Level",1:3))) #wide data format
# dat <- dat * 100
df <- data.frame("ID"=factor(rep(paste0("s",1:10), 3)),
"Level"=factor(rep(paste0("Level",1:3), each=10)),
"Response"=c(dat)) #long data format
Or, testing a simulated data set.
mu.b <- 100; sigma.b <- 20; n <- 24
delta.mu <- 6.7; sigma.e <- 6.67 #rho=.9, power=.8
set.seed(277)
TrueScore <- rnorm(n, mu.b, sigma.b)
eij1 <- rnorm(n, 0, sigma.e)
eij2 <- rnorm(n, 0, sigma.e)
dat <- cbind("Level1"=TrueScore+eij1+delta.mu,
"Level2"=TrueScore+eij2)
# dat <- dat * 100
df <- data.frame("ID"=factor(rep(paste0("s",1:n), 2)),
"Level"=factor(rep(c("Level1", "Level2"), each=n)),
"Response"=c(dat))
## The Bayes factors will be computed for the hypothetical data (in the first chunk).
Method: Constructing the Jeffreys-Zellner-Siow (JZS) Bayes factor.
\[\begin{align*} \tag{1} \mathcal{M}_1:\ Y_{ij}&=\mu+\sigma_\epsilon(d_i+b_j)+\epsilon_{ij}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ij}&=\mu+\sigma_\epsilon b_j+\epsilon_{ij},\qquad\epsilon_{ij}\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,\sigma_\epsilon^2) \text{ for } i=1,\dotsb,a;\ j=1,\dotsb,n. \end{align*}\]
\[\begin{equation} \tag{2} \pi(\mu,\sigma_\epsilon^2)\propto 1/\ \sigma_\epsilon^{2} \end{equation}\]
\[\begin{equation} \tag{3} d_i^\star\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]
\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]
\[\begin{equation} \tag{5} (d_1^\star,\dotsb,d_{a-1}^\star)=(d_1,\dotsb,d_a)\cdot\mathbf{Q} \end{equation}\]
\[\begin{equation} \tag{6} \mathbf{I}_a-\frac{1}{a}\mathbf{J}_a=\mathbf{Q}\cdot\mathbf{Q}^\top \end{equation}\]
\[\begin{equation} \tag{7} b_j\mid g_b\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g_b) \end{equation}\]
\[\begin{equation} \tag{8} g_b\sim\text{Scale-inv-}\chi^2(1,h_b^2) \end{equation}\]
\(\mathbf{Q}\) is an \(a\times(a-1)\) matrix of the \(a-1\) eigenvectors of unit length corresponding to the nonzero eigenvalues of the left side term in (6). For example, the projected main effect \(d^\star=(d_1-d_2)/\sqrt{2}\) when \(a=2\). In the other direction (given \(d_1+d_2=0\)), \(d_1=d^\star/\sqrt{2}\) and \(d_2=-d^\star/\sqrt{2}\).
set.seed(277)
(JZS_BF <- anovaBF(Response~Level+ID, data=df, whichRandom="ID", progress=F))
## Bayes factor analysis
## --------------
## [1] Level + ID : 35959.93 ±0.56%
##
## Against denominator:
## Response ~ ID
## ---
## Bayes factor type: BFlinearModel, JZS
Note: Fixed effects are assumed. See the discussion here.
Markov chain Monte Carlo (MCMC) diagnostics. Plots of iterations versus sampled values for each variable in the chain.
set.seed(277)
draws <- posterior(JZS_BF, iterations=1e5, progress=F)
summary(draws)
##
## Iterations = 1:1e+05
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 1e+05
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## mu 12.7292 1.9846 0.0062759 0.0062759
## Level-Level1 -1.6542 0.2364 0.0007475 0.0008613
## Level-Level2 0.2538 0.2241 0.0007088 0.0007100
## Level-Level3 1.4004 0.2330 0.0007367 0.0008187
## ID-s1 -0.7238 2.0379 0.0064445 0.0064445
## ID-s10 -3.0369 2.0370 0.0064415 0.0064415
## ID-s2 -5.3524 2.0380 0.0064446 0.0064446
## ID-s3 0.2681 2.0370 0.0064416 0.0064416
## ID-s4 10.5135 2.0384 0.0064461 0.0064461
## ID-s5 5.2273 2.0393 0.0064489 0.0064489
## ID-s6 3.5731 2.0352 0.0064359 0.0064359
## ID-s7 -10.6347 2.0369 0.0064412 0.0064412
## ID-s8 1.9211 2.0361 0.0064386 0.0064386
## ID-s9 -1.7162 2.0395 0.0064495 0.0064495
## sig2 0.7938 0.3294 0.0010416 0.0023032
## g_Level 7.7913 94.4643 0.2987222 0.2987222
## g_ID 56.2794 39.9360 0.1262886 0.2052537
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## mu 8.7772 11.4887 12.7270 13.9666 16.6988
## Level-Level1 -2.1042 -1.8100 -1.6599 -1.5046 -1.1664
## Level-Level2 -0.1907 0.1090 0.2547 0.3998 0.6950
## Level-Level3 0.9238 1.2533 1.4041 1.5527 1.8510
## ID-s1 -4.7946 -2.0006 -0.7140 0.5661 3.3221
## ID-s10 -7.1093 -4.3147 -3.0322 -1.7511 0.9941
## ID-s2 -9.4374 -6.6315 -5.3413 -4.0637 -1.3013
## ID-s3 -3.8028 -1.0187 0.2716 1.5472 4.3035
## ID-s4 6.4649 9.2282 10.5097 11.7944 14.6162
## ID-s5 1.1702 3.9469 5.2212 6.5136 9.2945
## ID-s6 -0.4687 2.2866 3.5639 4.8580 7.6215
## ID-s7 -14.6990 -11.9076 -10.6180 -9.3487 -6.6167
## ID-s8 -2.1454 0.6405 1.9223 3.2007 5.9699
## ID-s9 -5.8027 -3.0012 -1.7110 -0.4266 2.3528
## sig2 0.3849 0.5714 0.7223 0.9325 1.6225
## g_Level 0.5096 1.5649 2.9401 6.0788 36.1203
## g_ID 14.7594 31.1880 46.1816 68.9989 157.4876
plot(draws, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
\[\begin{align*} \tag{a} \mathcal{M}_1:\ Y_{ij}&=\mu+t_i+\epsilon_{ij}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ij}&=\mu+\epsilon_{ij}\\ (\epsilon_{1j},\dotsb,\epsilon_{aj})^\top&\overset{\text{i.i.d.}}{\sim}\mathbf{\mathcal{N}}_a(\mathbf{0},\mathbf{\Sigma})\qquad\text{for } i=1,\dotsb,a;\ j=1,\dotsb,n. \end{align*}\]
\[\begin{equation} \tag{b} \pi(\mu)\propto 1 \end{equation}\]
\[\begin{equation} \tag{c} \mathbf{\Sigma}\sim\mathcal{W}^{-1}(\mathbf{\Psi},\ \nu) \end{equation}\]
\[\begin{equation} \tag{3} t_i^\star\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]
\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]
\[\begin{equation} \tag{5} (t_1^\star,\dotsb,t_{a-1}^\star)=(t_1,\dotsb,t_a)\cdot\mathbf{Q} \end{equation}\]
\[\begin{equation} \tag{6} \mathbf{I}_a-\frac{1}{a}\mathbf{J}_a=\mathbf{Q}\cdot\mathbf{Q}^\top \end{equation}\]
\(\mathbf{Q}\) is an \(a\times(a-1)\) matrix of the \(a-1\) eigenvectors of unit length corresponding to the nonzero eigenvalues of the left side term in (6). For example, the projected main effect \(t^\star=(t_1-t_2)/\sqrt{2}\) when \(a=2\). In the other direction (given \(t_1+t_2=0\)), \(t_1=t^\star/\sqrt{2}\) and \(t_2=-t^\star/\sqrt{2}\).
computeQ <- function(C) {
#' reduced parametrization for the fixed treatment effects
S <- diag(C) - matrix(1, nrow = C, ncol = C) / C
e <- qr(S)
Q <- qr.Q(e) %*% diag(sign(diag(qr.R(e))))
Q[, which(abs(e$qraux) > 1e-3), drop = FALSE]
}
Specify the full model \(\mathcal{M}_1\) in (a).
stancodeM1 initially declares the reduced fixed treatment
effects tf. Note that the effects are not
standardized (\(d_i=t_i/\sigma_\epsilon\)) but the data are
scaled. Compile and fit the model in Stan.
\[\begin{equation} \tag{c.1} \mathbf{\Sigma}\sim\mathcal{W}^{-1}(\mathbf{I}_{a},\ a) \end{equation}\]
Note that to compute the (log) marginal likelihood
for a Stan model, we need to specify the model in a certain way. Instead
of using “~” signs for specifying distributions, we need to directly use
the (log) density functions. The reason for this is that when using the
“~” sign, constant terms are dropped which are not needed for sampling
from the posterior. However, for computing the marginal likelihood,
these constants need to be retained. For instance, instead of writing
y ~ normal(mu, sigma); we would need to write
target += normal_lpdf(y | mu, sigma); (Gronau,
2021). See also the discussion regarding the implicit
uniform prior for computing the log marginal likelihood.
stancodeM1 <- "
data {
int<lower=1> N; // number of subjects
int<lower=2> C; // number of conditions
// vector[C] Y[N]; // responses [removed features]
array[N] vector[C] Y; // responses [Stan 2.26+ syntax for array declarations]
matrix[C,C-1] P; // projecting C-1 fixed effects into C
real<lower=0> ht; // scale parameter of the variance of the reduced treatment effect
matrix[C,C] V; // scale matrix of the inverse-Wishart prior
real<lower=C-1> nu; // degrees of freedom of the inverse-Wishart prior
real lwr; // lower bound for the grand mean
real upr; // upper bound for the grand mean
}
parameters {
real<lower=lwr, upper=upr> mu; // grand mean
cov_matrix[C] Sigma; // error covariance matrix
real<lower=0> gt; // variance of the reduced treatment effect
vector[C-1] tf; // reduced treatment effect
}
transformed parameters {
vector[C] t; // treatment effect
t = P * tf;
}
model {
target += uniform_lpdf(mu | lwr, upr);
target += inv_wishart_lpdf(Sigma | nu, V);
target += scaled_inv_chi_square_lpdf(gt | 1, ht);
target += normal_lpdf(tf | 0, sqrt(gt));
for (k in 1:N) {
target += multi_normal_lpdf(Y[k] | mu + t, Sigma);
}
}"
stanmodelM1 <- stan_model(model_code=stancodeM1)
datalistM1 <- list("N"=nrow(dat), "C"=ncol(dat),
"Y"=dat/sd(dat), #scale the data
"P"=computeQ(ncol(dat)), "ht"=.5,
"V"=diag(ncol(dat)), "nu"=ncol(dat))
datalistM1$lwr <- mean(datalistM1[["Y"]])-6*sd(datalistM1[["Y"]]) # -6 * 1
datalistM1$upr <- mean(datalistM1[["Y"]])+6*sd(datalistM1[["Y"]])
stanfitM1 <- sampling(stanmodelM1, data=datalistM1,
iter=50000, warmup=10000, chains=4, seed=277, refresh=0)
rstan::summary(stanfitM1)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
| mean | se_mean | sd | 2.5% | 25% | 50% | 75% | 97.5% | n_eff | Rhat | |
|---|---|---|---|---|---|---|---|---|---|---|
| mu | 2.1556 | 0.0010 | 0.3392 | 1.4795 | 1.9424 | 2.1554 | 2.3696 | 2.8294 | 124133.64 | 1.0001 |
| Sigma[1,1] | 1.2071 | 0.0033 | 0.6838 | 0.4755 | 0.7726 | 1.0367 | 1.4352 | 2.9667 | 43957.39 | 1.0001 |
| Sigma[1,2] | 1.1183 | 0.0033 | 0.6691 | 0.3970 | 0.6940 | 0.9538 | 1.3423 | 2.8355 | 41644.26 | 1.0001 |
| Sigma[1,3] | 1.0995 | 0.0033 | 0.6563 | 0.3891 | 0.6817 | 0.9362 | 1.3205 | 2.7896 | 40607.91 | 1.0000 |
| Sigma[2,1] | 1.1183 | 0.0033 | 0.6691 | 0.3970 | 0.6940 | 0.9538 | 1.3423 | 2.8355 | 41644.26 | 1.0001 |
| Sigma[2,2] | 1.3125 | 0.0035 | 0.7414 | 0.5163 | 0.8420 | 1.1297 | 1.5592 | 3.2194 | 44336.54 | 1.0001 |
| Sigma[2,3] | 1.1400 | 0.0034 | 0.6812 | 0.4014 | 0.7061 | 0.9718 | 1.3688 | 2.8906 | 40970.54 | 1.0001 |
| Sigma[3,1] | 1.0995 | 0.0033 | 0.6563 | 0.3891 | 0.6817 | 0.9362 | 1.3205 | 2.7896 | 40607.91 | 1.0000 |
| Sigma[3,2] | 1.1400 | 0.0034 | 0.6812 | 0.4014 | 0.7061 | 0.9718 | 1.3688 | 2.8906 | 40970.54 | 1.0001 |
| Sigma[3,3] | 1.2695 | 0.0035 | 0.7149 | 0.4986 | 0.8130 | 1.0906 | 1.5090 | 3.1085 | 42421.92 | 1.0001 |
| gt | 0.3779 | 0.0046 | 1.2566 | 0.0397 | 0.0947 | 0.1672 | 0.3329 | 1.8549 | 75394.60 | 1.0000 |
| tf[1] | -0.3293 | 0.0003 | 0.1126 | -0.5474 | -0.4018 | -0.3313 | -0.2590 | -0.0994 | 122646.75 | 1.0000 |
| tf[2] | -0.1304 | 0.0003 | 0.1161 | -0.3589 | -0.2048 | -0.1311 | -0.0573 | 0.1020 | 133501.98 | 1.0000 |
| t[1] | -0.2689 | 0.0003 | 0.0920 | -0.4470 | -0.3281 | -0.2705 | -0.2114 | -0.0812 | 122646.75 | 1.0000 |
| t[2] | 0.0422 | 0.0003 | 0.0936 | -0.1447 | -0.0171 | 0.0422 | 0.1019 | 0.2280 | 133492.43 | 1.0000 |
| t[3] | 0.2266 | 0.0003 | 0.0946 | 0.0345 | 0.1673 | 0.2279 | 0.2873 | 0.4105 | 128015.65 | 1.0001 |
| lp__ | -30.1349 | 0.0126 | 2.6863 | -36.4063 | -31.6730 | -29.7439 | -28.1765 | -26.0760 | 45239.40 | 1.0001 |
mean(dat/sd(dat))
## [1] 2.162076
cov(dat/sd(dat))
## Level1 Level2 Level3
## Level1 0.9674355 0.9994698 0.9834526
## Level2 0.9994698 1.0635384 1.0186903
## Level3 0.9834526 1.0186903 1.0238158
\[\begin{align*} \tag{a} \mathcal{M}_1:\ Y_{ij}&=\mu+t_i+\epsilon_{ij}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ij}&=\mu+\epsilon_{ij}\\ (\epsilon_{1j},\dotsb,\epsilon_{aj})^\top&\overset{\text{i.i.d.}}{\sim}\mathbf{\mathcal{N}}_a(\mathbf{0},\mathbf{\Sigma})\qquad\text{for } i=1,\dotsb,a;\ j=1,\dotsb,n. \end{align*}\]
\[\begin{equation} \tag{b} \pi(\mu)\propto 1 \end{equation}\]
\[\begin{equation} \tag{c} \mathbf{\Sigma}\sim\mathcal{W}^{-1}(\mathbf{\Psi},\ \nu) \end{equation}\]
\[\begin{equation} \tag{3} t_i^\star\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]
\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]
\[\begin{equation} \tag{5} (t_1^\star,\dotsb,t_{a-1}^\star)=(t_1,\dotsb,t_a)\cdot\mathbf{Q} \end{equation}\]
\[\begin{equation} \tag{6} \mathbf{I}_a-\frac{1}{a}\mathbf{J}_a=\mathbf{Q}\cdot\mathbf{Q}^\top \end{equation}\]
\(\mathbf{Q}\) is an \(a\times(a-1)\) matrix of the \(a-1\) eigenvectors of unit length corresponding to the nonzero eigenvalues of the left side term in (6). For example, the projected main effect \(t^\star=(t_1-t_2)/\sqrt{2}\) when \(a=2\). In the other direction (given \(t_1+t_2=0\)), \(t_1=t^\star/\sqrt{2}\) and \(t_2=-t^\star/\sqrt{2}\).
Specify the null model \(\mathcal{M}_0\) in (a). Compile and fit the model in Stan.
\[\begin{equation} \tag{c.1} \mathbf{\Sigma}\sim\mathcal{W}^{-1}(\mathbf{I}_{a},\ a) \end{equation}\]
stancodeM0 <- "
data {
int<lower=1> N; // number of subjects
int<lower=2> C; // number of conditions
// vector[C] Y[N]; // responses [removed features]
array[N] vector[C] Y; // responses [Stan 2.26+ syntax for array declarations]
matrix[C,C] V; // scale matrix of the inverse-Wishart prior
real<lower=C-1> nu; // degrees of freedom of the inverse-Wishart prior
real lwr; // lower bound for the grand mean
real upr; // upper bound for the grand mean
}
parameters {
real<lower=lwr, upper=upr> mu; // grand mean
cov_matrix[C] Sigma; // error covariance matrix
}
transformed parameters {
vector[C] muS; // grand mean vector
for (i in 1:C) {
muS[i] = mu;
}
}
model {
target += uniform_lpdf(mu | lwr, upr);
target += inv_wishart_lpdf(Sigma | nu, V);
for (k in 1:N) {
target += multi_normal_lpdf(Y[k] | muS, Sigma);
}
}"
stanmodelM0 <- stan_model(model_code=stancodeM0)
stanfitM0 <- sampling(stanmodelM0, data=datalistM1[-c(4,5)],
iter=50000, warmup=10000, chains=4, seed=277, refresh=0)
rstan::summary(stanfitM0)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
| mean | se_mean | sd | 2.5% | 25% | 50% | 75% | 97.5% | n_eff | Rhat | |
|---|---|---|---|---|---|---|---|---|---|---|
| mu | 2.0916 | 0.0021 | 0.4708 | 1.1467 | 1.7947 | 2.0932 | 2.3898 | 3.0293 | 51976.73 | 1.0000 |
| Sigma[1,1] | 1.3772 | 0.0047 | 0.8848 | 0.4950 | 0.8304 | 1.1420 | 1.6348 | 3.6719 | 36053.65 | 1.0001 |
| Sigma[1,2] | 1.2123 | 0.0046 | 0.8140 | 0.3835 | 0.7096 | 1.0013 | 1.4535 | 3.3372 | 31123.22 | 1.0001 |
| Sigma[1,3] | 1.1462 | 0.0046 | 0.8019 | 0.3226 | 0.6526 | 0.9411 | 1.3826 | 3.2372 | 30012.84 | 1.0001 |
| Sigma[2,1] | 1.2123 | 0.0046 | 0.8140 | 0.3835 | 0.7096 | 1.0013 | 1.4535 | 3.3372 | 31123.22 | 1.0001 |
| Sigma[2,2] | 1.4289 | 0.0050 | 0.8828 | 0.5298 | 0.8822 | 1.1992 | 1.6908 | 3.7435 | 30958.54 | 1.0001 |
| Sigma[2,3] | 1.2999 | 0.0052 | 0.8846 | 0.4191 | 0.7578 | 1.0672 | 1.5535 | 3.6134 | 29040.74 | 1.0001 |
| Sigma[3,1] | 1.1462 | 0.0046 | 0.8019 | 0.3226 | 0.6526 | 0.9411 | 1.3826 | 3.2372 | 30012.84 | 1.0001 |
| Sigma[3,2] | 1.2999 | 0.0052 | 0.8846 | 0.4191 | 0.7578 | 1.0672 | 1.5535 | 3.6134 | 29040.74 | 1.0001 |
| Sigma[3,3] | 1.4887 | 0.0056 | 0.9770 | 0.5278 | 0.8920 | 1.2298 | 1.7659 | 4.0336 | 30039.28 | 1.0001 |
| muS[1] | 2.0916 | 0.0021 | 0.4708 | 1.1467 | 1.7947 | 2.0932 | 2.3898 | 3.0293 | 51976.73 | 1.0000 |
| muS[2] | 2.0916 | 0.0021 | 0.4708 | 1.1467 | 1.7947 | 2.0932 | 2.3898 | 3.0293 | 51976.73 | 1.0000 |
| muS[3] | 2.0916 | 0.0021 | 0.4708 | 1.1467 | 1.7947 | 2.0932 | 2.3898 | 3.0293 | 51976.73 | 1.0000 |
| lp__ | -32.4984 | 0.0106 | 2.1629 | -37.7277 | -33.6743 | -32.1213 | -30.9072 | -29.4231 | 41555.65 | 1.0001 |
mean(dat/sd(dat))
## [1] 2.162076
cov(dat/sd(dat))
## Level1 Level2 Level3
## Level1 0.9674355 0.9994698 0.9834526
## Level2 0.9994698 1.0635384 1.0186903
## Level3 0.9834526 1.0186903 1.0238158
The newly proposed models in (a): (i) do not include an explicit subject random effects term, (ii) introduce a more general structure of the error term, (iii) do not standardize the treatment effects, and (iv) scale the data.
Compute log marginal likelihoods for full and null models via bridge sampling. Then, \(\textit{BF}_{10}=\frac{p(\text{Data}\ \mid\ \mathcal{M}_1)}{p(\text{Data}\ \mid\ \mathcal{M}_0)}\).
M1 <- bridge_sampler(stanfitM1, silent=T)
summary(M1)
##
## Bridge sampling log marginal likelihood estimate
## (method = "normal", repetitions = 1):
##
## -33.39117
##
## Error Measures:
##
## Relative Mean-Squared Error: 6.167597e-06
## Coefficient of Variation: 0.002483465
## Percentage Error: 0%
##
## Note:
## All error measures are approximate.
M0 <- bridge_sampler(stanfitM0, silent=T)
summary(M0)
##
## Bridge sampling log marginal likelihood estimate
## (method = "normal", repetitions = 1):
##
## -34.77583
##
## Error Measures:
##
## Relative Mean-Squared Error: 4.782054e-06
## Coefficient of Variation: 0.002186791
## Percentage Error: 0%
##
## Note:
## All error measures are approximate.
bf(M1, M0)
## Estimated Bayes factor in favor of M1 over M0: 3.99348
Do we expect the Bayes factor estimates to vary significantly despite different model specifications?
\[\begin{equation} \tag{c.2} \mathbf{\Sigma}\sim\mathcal{W}^{-1}(\mathbf{I}_{a},\ a+1) \end{equation}\]
datalistM1$nu <- ncol(dat)+1
stanfitM1 <- sampling(stanmodelM1, data=datalistM1,
iter=50000, warmup=10000, chains=4, seed=277, refresh=0)
rstan::summary(stanfitM1)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
| mean | se_mean | sd | 2.5% | 25% | 50% | 75% | 97.5% | n_eff | Rhat | |
|---|---|---|---|---|---|---|---|---|---|---|
| mu | 2.1581 | 0.0009 | 0.3216 | 1.5171 | 1.9560 | 2.1580 | 2.3605 | 2.7978 | 126136.07 | 1 |
| Sigma[1,1] | 1.0745 | 0.0028 | 0.5779 | 0.4423 | 0.7069 | 0.9366 | 1.2729 | 2.5101 | 41829.80 | 1 |
| Sigma[1,2] | 0.9953 | 0.0029 | 0.5679 | 0.3724 | 0.6351 | 0.8616 | 1.1906 | 2.4070 | 38494.72 | 1 |
| Sigma[1,3] | 0.9795 | 0.0028 | 0.5597 | 0.3644 | 0.6254 | 0.8467 | 1.1701 | 2.3695 | 38638.07 | 1 |
| Sigma[2,1] | 0.9953 | 0.0029 | 0.5679 | 0.3724 | 0.6351 | 0.8616 | 1.1906 | 2.4070 | 38494.72 | 1 |
| Sigma[2,2] | 1.1689 | 0.0031 | 0.6293 | 0.4796 | 0.7697 | 1.0195 | 1.3851 | 2.7313 | 40341.96 | 1 |
| Sigma[2,3] | 1.0157 | 0.0030 | 0.5829 | 0.3765 | 0.6477 | 0.8786 | 1.2153 | 2.4583 | 37734.75 | 1 |
| Sigma[3,1] | 0.9795 | 0.0028 | 0.5597 | 0.3644 | 0.6254 | 0.8467 | 1.1701 | 2.3695 | 38638.07 | 1 |
| Sigma[3,2] | 1.0157 | 0.0030 | 0.5829 | 0.3765 | 0.6477 | 0.8786 | 1.2153 | 2.4583 | 37734.75 | 1 |
| Sigma[3,3] | 1.1317 | 0.0030 | 0.6109 | 0.4670 | 0.7448 | 0.9861 | 1.3400 | 2.6574 | 40218.21 | 1 |
| gt | 0.4057 | 0.0125 | 3.4701 | 0.0402 | 0.0951 | 0.1673 | 0.3293 | 1.8736 | 76800.65 | 1 |
| tf[1] | -0.3319 | 0.0003 | 0.1078 | -0.5407 | -0.4015 | -0.3339 | -0.2642 | -0.1123 | 126415.15 | 1 |
| tf[2] | -0.1316 | 0.0003 | 0.1107 | -0.3505 | -0.2029 | -0.1322 | -0.0610 | 0.0905 | 136349.99 | 1 |
| t[1] | -0.2710 | 0.0002 | 0.0880 | -0.4415 | -0.3278 | -0.2726 | -0.2157 | -0.0917 | 126415.15 | 1 |
| t[2] | 0.0425 | 0.0002 | 0.0896 | -0.1350 | -0.0147 | 0.0425 | 0.0998 | 0.2208 | 135296.73 | 1 |
| t[3] | 0.2285 | 0.0002 | 0.0900 | 0.0459 | 0.1717 | 0.2298 | 0.2866 | 0.4038 | 132586.92 | 1 |
| lp__ | -28.3448 | 0.0125 | 2.6646 | -34.5977 | -29.8547 | -27.9541 | -26.4085 | -24.3125 | 45432.12 | 1 |
M1 <- bridge_sampler(stanfitM1, silent=T)
summary(M1)
##
## Bridge sampling log marginal likelihood estimate
## (method = "normal", repetitions = 1):
##
## -32.14397
##
## Error Measures:
##
## Relative Mean-Squared Error: 5.658335e-06
## Coefficient of Variation: 0.002378725
## Percentage Error: 0%
##
## Note:
## All error measures are approximate.
stanfitM0 <- sampling(stanmodelM0, data=datalistM1[-c(4,5)],
iter=50000, warmup=10000, chains=4, seed=277, refresh=0)
## Warning: There were 1 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: Examine the pairs() plot to diagnose sampling problems
rstan::summary(stanfitM0)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
| mean | se_mean | sd | 2.5% | 25% | 50% | 75% | 97.5% | n_eff | Rhat | |
|---|---|---|---|---|---|---|---|---|---|---|
| mu | 2.0921 | 0.0019 | 0.4451 | 1.2085 | 1.8080 | 2.0906 | 2.3757 | 2.9803 | 57175.01 | 1.0001 |
| Sigma[1,1] | 1.2118 | 0.0036 | 0.7212 | 0.4635 | 0.7602 | 1.0254 | 1.4411 | 3.0603 | 41131.14 | 1.0001 |
| Sigma[1,2] | 1.0645 | 0.0035 | 0.6601 | 0.3628 | 0.6487 | 0.8986 | 1.2789 | 2.7620 | 35523.06 | 1.0001 |
| Sigma[1,3] | 1.0052 | 0.0035 | 0.6508 | 0.3078 | 0.5974 | 0.8430 | 1.2166 | 2.6703 | 34258.82 | 1.0001 |
| Sigma[2,1] | 1.0645 | 0.0035 | 0.6601 | 0.3628 | 0.6487 | 0.8986 | 1.2789 | 2.7620 | 35523.06 | 1.0001 |
| Sigma[2,2] | 1.2606 | 0.0038 | 0.7169 | 0.4970 | 0.8060 | 1.0807 | 1.4938 | 3.1079 | 34903.33 | 1.0001 |
| Sigma[2,3] | 1.1447 | 0.0040 | 0.7190 | 0.3946 | 0.6946 | 0.9613 | 1.3684 | 3.0026 | 32854.21 | 1.0001 |
| Sigma[3,1] | 1.0052 | 0.0035 | 0.6508 | 0.3078 | 0.5974 | 0.8430 | 1.2166 | 2.6703 | 34258.82 | 1.0001 |
| Sigma[3,2] | 1.1447 | 0.0040 | 0.7190 | 0.3946 | 0.6946 | 0.9613 | 1.3684 | 3.0026 | 32854.21 | 1.0001 |
| Sigma[3,3] | 1.3147 | 0.0043 | 0.7955 | 0.4943 | 0.8168 | 1.1102 | 1.5576 | 3.3674 | 34106.94 | 1.0001 |
| muS[1] | 2.0921 | 0.0019 | 0.4451 | 1.2085 | 1.8080 | 2.0906 | 2.3757 | 2.9803 | 57175.01 | 1.0001 |
| muS[2] | 2.0921 | 0.0019 | 0.4451 | 1.2085 | 1.8080 | 2.0906 | 2.3757 | 2.9803 | 57175.01 | 1.0001 |
| muS[3] | 2.0921 | 0.0019 | 0.4451 | 1.2085 | 1.8080 | 2.0906 | 2.3757 | 2.9803 | 57175.01 | 1.0001 |
| lp__ | -31.1359 | 0.0101 | 2.1281 | -36.2739 | -32.3107 | -30.7570 | -29.5698 | -28.1179 | 44334.45 | 1.0000 |
M0 <- bridge_sampler(stanfitM0, silent=T)
summary(M0)
##
## Bridge sampling log marginal likelihood estimate
## (method = "normal", repetitions = 1):
##
## -33.86874
##
## Error Measures:
##
## Relative Mean-Squared Error: 4.222428e-06
## Coefficient of Variation: 0.002054855
## Percentage Error: 0%
##
## Note:
## All error measures are approximate.
bf(M1, M0) #Bayes factor
## Estimated Bayes factor in favor of M1 over M0: 5.61123
Do we expect the Bayes factor estimates to vary significantly despite different model specifications?
\[\begin{equation} \tag{c.3} \mathbf{\Sigma}\sim\mathcal{W}^{-1}\left((\nu_0-a-1)\cdot\mathbf{S},\ \nu_0\right) \end{equation}\]
Both prior and posterior means are the sample covariance matrix. Set \(\nu_0=a+2\).
datalistM1$nu <- ncol(dat)+2
datalistM1$V <- cov(datalistM1$Y)
#Increase MCMC iterations
stanfitM1 <- sampling(stanmodelM1, data=datalistM1,
iter=100000, warmup=50000, chains=4, seed=277, refresh=0)
rstan::summary(stanfitM1)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
| mean | se_mean | sd | 2.5% | 25% | 50% | 75% | 97.5% | n_eff | Rhat | |
|---|---|---|---|---|---|---|---|---|---|---|
| mu | 2.1554 | 0.0009 | 0.3172 | 1.5260 | 1.9530 | 2.1560 | 2.3573 | 2.7843 | 132500.25 | 1 |
| Sigma[1,1] | 0.9660 | 0.0024 | 0.4832 | 0.4132 | 0.6507 | 0.8512 | 1.1450 | 2.1848 | 41076.62 | 1 |
| Sigma[1,2] | 0.9981 | 0.0025 | 0.5026 | 0.4233 | 0.6695 | 0.8788 | 1.1845 | 2.2688 | 40854.22 | 1 |
| Sigma[1,3] | 0.9821 | 0.0024 | 0.4938 | 0.4171 | 0.6594 | 0.8647 | 1.1654 | 2.2311 | 40921.06 | 1 |
| Sigma[2,1] | 0.9981 | 0.0025 | 0.5026 | 0.4233 | 0.6695 | 0.8788 | 1.1845 | 2.2688 | 40854.22 | 1 |
| Sigma[2,2] | 1.0622 | 0.0026 | 0.5311 | 0.4551 | 0.7150 | 0.9369 | 1.2594 | 2.3986 | 41391.03 | 1 |
| Sigma[2,3] | 1.0174 | 0.0025 | 0.5145 | 0.4284 | 0.6809 | 0.8958 | 1.2095 | 2.3148 | 40757.09 | 1 |
| Sigma[3,1] | 0.9821 | 0.0024 | 0.4938 | 0.4171 | 0.6594 | 0.8647 | 1.1654 | 2.2311 | 40921.06 | 1 |
| Sigma[3,2] | 1.0174 | 0.0025 | 0.5145 | 0.4284 | 0.6809 | 0.8958 | 1.2095 | 2.3148 | 40757.09 | 1 |
| Sigma[3,3] | 1.0224 | 0.0025 | 0.5115 | 0.4386 | 0.6883 | 0.9013 | 1.2127 | 2.3179 | 41386.71 | 1 |
| gt | 0.4049 | 0.0202 | 5.6790 | 0.0428 | 0.0972 | 0.1701 | 0.3304 | 1.8563 | 78704.34 | 1 |
| tf[1] | -0.3578 | 0.0001 | 0.0322 | -0.4213 | -0.3784 | -0.3580 | -0.3376 | -0.2929 | 127203.98 | 1 |
| tf[2] | -0.1419 | 0.0001 | 0.0493 | -0.2396 | -0.1736 | -0.1421 | -0.1106 | -0.0430 | 137760.47 | 1 |
| t[1] | -0.2921 | 0.0001 | 0.0263 | -0.3440 | -0.3089 | -0.2923 | -0.2756 | -0.2392 | 127203.98 | 1 |
| t[2] | 0.0457 | 0.0001 | 0.0389 | -0.0318 | 0.0209 | 0.0457 | 0.0705 | 0.1235 | 125636.31 | 1 |
| t[3] | 0.2464 | 0.0001 | 0.0356 | 0.1750 | 0.2238 | 0.2466 | 0.2692 | 0.3163 | 151258.06 | 1 |
| lp__ | -12.7991 | 0.0111 | 2.6414 | -18.9579 | -14.3152 | -12.4200 | -10.8702 | -8.7938 | 56123.04 | 1 |
M1 <- bridge_sampler(stanfitM1, silent=T)
summary(M1)
##
## Bridge sampling log marginal likelihood estimate
## (method = "normal", repetitions = 1):
##
## -22.00005
##
## Error Measures:
##
## Relative Mean-Squared Error: 6.926012e-06
## Coefficient of Variation: 0.002631732
## Percentage Error: 0%
##
## Note:
## All error measures are approximate.
stanfitM0 <- sampling(stanmodelM0, data=datalistM1[-c(4,5)],
iter=100000, warmup=50000, chains=4, seed=277, refresh=0)
## Warning: There were 597 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: Examine the pairs() plot to diagnose sampling problems
rstan::summary(stanfitM0)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
| mean | se_mean | sd | 2.5% | 25% | 50% | 75% | 97.5% | n_eff | Rhat | |
|---|---|---|---|---|---|---|---|---|---|---|
| mu | 1.2708 | 0.0101 | 1.0836 | -0.8580 | 0.5610 | 1.2634 | 1.9662 | 3.4554 | 11426.67 | 1.0005 |
| Sigma[1,1] | 2.2607 | 0.0207 | 2.3525 | 0.4940 | 0.9240 | 1.4711 | 2.6465 | 8.8663 | 12938.85 | 1.0005 |
| Sigma[1,2] | 2.4739 | 0.0231 | 2.6002 | 0.4917 | 0.9678 | 1.5928 | 2.9433 | 9.7780 | 12643.87 | 1.0005 |
| Sigma[1,3] | 2.5700 | 0.0247 | 2.7581 | 0.4532 | 0.9522 | 1.6301 | 3.0989 | 10.3053 | 12479.94 | 1.0005 |
| Sigma[2,1] | 2.4739 | 0.0231 | 2.6002 | 0.4917 | 0.9678 | 1.5928 | 2.9433 | 9.7780 | 12643.87 | 1.0005 |
| Sigma[2,2] | 2.8208 | 0.0258 | 2.8994 | 0.5648 | 1.1041 | 1.8394 | 3.4041 | 10.9745 | 12627.28 | 1.0005 |
| Sigma[2,3] | 2.9535 | 0.0275 | 3.0785 | 0.5387 | 1.1056 | 1.9110 | 3.6132 | 11.5865 | 12546.63 | 1.0005 |
| Sigma[3,1] | 2.5700 | 0.0247 | 2.7581 | 0.4532 | 0.9522 | 1.6301 | 3.0989 | 10.3053 | 12479.94 | 1.0005 |
| Sigma[3,2] | 2.9535 | 0.0275 | 3.0785 | 0.5387 | 1.1056 | 1.9110 | 3.6132 | 11.5865 | 12546.63 | 1.0005 |
| Sigma[3,3] | 3.1694 | 0.0293 | 3.2757 | 0.5659 | 1.1726 | 2.0648 | 3.9136 | 12.3552 | 12532.56 | 1.0005 |
| muS[1] | 1.2708 | 0.0101 | 1.0836 | -0.8580 | 0.5610 | 1.2634 | 1.9662 | 3.4554 | 11426.67 | 1.0005 |
| muS[2] | 1.2708 | 0.0101 | 1.0836 | -0.8580 | 0.5610 | 1.2634 | 1.9662 | 3.4554 | 11426.67 | 1.0005 |
| muS[3] | 1.2708 | 0.0101 | 1.0836 | -0.8580 | 0.5610 | 1.2634 | 1.9662 | 3.4554 | 11426.67 | 1.0005 |
| lp__ | -31.0991 | 0.0100 | 2.0858 | -36.1437 | -32.2181 | -30.7279 | -29.5740 | -28.1581 | 43122.11 | 1.0001 |
M0 <- bridge_sampler(stanfitM0, silent=T)
summary(M0)
##
## Bridge sampling log marginal likelihood estimate
## (method = "normal", repetitions = 1):
##
## -35.18453
##
## Error Measures:
##
## Relative Mean-Squared Error: 2.31987e-05
## Coefficient of Variation: 0.004816503
## Percentage Error: 0%
##
## Note:
## All error measures are approximate.
bf(M1, M0) #Bayes factor
## Estimated Bayes factor in favor of M1 over M0: 532041.14440
Do we expect the Bayes factor estimates to vary significantly despite different model specifications?
library(rstan)
# devtools::install_github("quentingronau/bridgesampling@master")
library(bridgesampling)
library(BayesFactor)
run.this=T #if TRUE, test the hypothetical data; if FALSE, test the simulated data
if (run.this) {
dat <- matrix(c(10,6,11,22,16,15,1,12,9,8,
13,8,14,23,18,17,1,15,12,9,
13,8,14,25,20,17,4,17,12,12), ncol=3,
dimnames=list(paste0("s",1:10), paste0("Level",1:3))) #wide data format
df <- data.frame("ID"=factor(rep(paste0("s",1:10), 3)),
"Level"=factor(rep(paste0("Level",1:3), each=10)),
"Response"=c(dat)) #long data format
} else {
mu.b <- 100; sigma.b <- 20; n <- 24
delta.mu <- 6.7; sigma.e <- 6.67 #rho=.9, power=.8
set.seed(277)
TrueScore <- rnorm(n, mu.b, sigma.b)
eij1 <- rnorm(n, 0, sigma.e)
eij2 <- rnorm(n, 0, sigma.e)
dat <- cbind("Level1" = TrueScore + eij1 + delta.mu,
"Level2" = TrueScore + eij2)
df <- data.frame("ID" = factor(rep(paste0("s",1:n), 2)),
"Level" = factor(rep(c("Level1", "Level2"), each = n)),
"Response" = as.vector(dat))
}
set.seed(277)
(JZS_BF <- anovaBF(Response~Level+ID, data=df, whichRandom="ID", progress=F))
# set.seed(277)
# draws <- posterior(JZS_BF, iterations=1e5, progress=F)
# summary(draws)
# plot(draws, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
computeQ <- function(C) {
#' reduced parametrization for the fixed treatment effects
S <- diag(C) - matrix(1, nrow = C, ncol = C) / C
e <- qr(S)
Q <- qr.Q(e) %*% diag(sign(diag(qr.R(e))))
Q[, which(abs(e$qraux) > 1e-3), drop = FALSE]
}
{
stancodeM1 <- "
data {
int<lower=1> N; // number of subjects
int<lower=2> C; // number of conditions
// vector[C] Y[N]; // responses [removed features]
array[N] vector[C] Y; // responses [Stan 2.26+ syntax for array declarations]
matrix[C,C-1] P; // projecting C-1 fixed effects into C
real<lower=0> ht; // scale parameter of the variance of the reduced treatment effect
matrix[C,C] V; // scale matrix of the inverse-Wishart prior
real<lower=C-1> nu; // degrees of freedom of the inverse-Wishart prior
real lwr; // lower bound for the grand mean
real upr; // upper bound for the grand mean
}
parameters {
real<lower=lwr, upper=upr> mu; // grand mean
cov_matrix[C] Sigma; // error covariance matrix
real<lower=0> gt; // variance of the reduced treatment effect
vector[C-1] tf; // reduced treatment effect
}
transformed parameters {
vector[C] t; // treatment effect
t = P * tf;
}
model {
target += uniform_lpdf(mu | lwr, upr);
target += inv_wishart_lpdf(Sigma | nu, V);
target += scaled_inv_chi_square_lpdf(gt | 1, ht);
target += normal_lpdf(tf | 0, sqrt(gt));
for (k in 1:N) {
target += multi_normal_lpdf(Y[k] | mu + t, Sigma);
}
}"
stanmodelM1 <- stan_model(model_code=stancodeM1)
stancodeM0 <- "
data {
int<lower=1> N; // number of subjects
int<lower=2> C; // number of conditions
// vector[C] Y[N]; // responses [removed features]
array[N] vector[C] Y; // responses [Stan 2.26+ syntax for array declarations]
matrix[C,C] V; // scale matrix of the inverse-Wishart prior
real<lower=C-1> nu; // degrees of freedom of the inverse-Wishart prior
real lwr; // lower bound for the grand mean
real upr; // upper bound for the grand mean
}
parameters {
real<lower=lwr, upper=upr> mu; // grand mean
cov_matrix[C] Sigma; // error covariance matrix
}
transformed parameters {
vector[C] muS; // grand mean vector
for (i in 1:C) {
muS[i] = mu;
}
}
model {
target += uniform_lpdf(mu | lwr, upr);
target += inv_wishart_lpdf(Sigma | nu, V);
for (k in 1:N) {
target += multi_normal_lpdf(Y[k] | muS, Sigma);
}
}"
stanmodelM0 <- stan_model(model_code=stancodeM0)
} #Stan models
datalistM1 <- list("N"=nrow(dat), "C"=ncol(dat),
"Y"=dat/sd(dat), #scale the data
"P"=computeQ(ncol(dat)), "ht"=.5,
"V"=diag(ncol(dat)), "nu"=ncol(dat)) # nu += 1
datalistM1$lwr <- mean(datalistM1[["Y"]])-6*sd(datalistM1[["Y"]])
datalistM1$upr <- mean(datalistM1[["Y"]])+6*sd(datalistM1[["Y"]])
BFBS <- function(seed=277, diagnostics=F) {
stanfitM0 <- sampling(stanmodelM0, data=datalistM1[-c(4,5)],
iter=50000, warmup=10000, chains=4, seed=seed, refresh=0)
stanfitM1 <- sampling(stanmodelM1, data=datalistM1,
iter=50000, warmup=10000, chains=4, seed=seed, refresh=0)
set.seed(seed)
M0 <- bridge_sampler(stanfitM0, silent=T)
set.seed(seed)
M1 <- bridge_sampler(stanfitM1, silent=T)
if (diagnostics) {
list("bf"=exp(M1$logml - M0$logml),
"pererr.M1"=error_measures(M1)$cv,
"pererr.M0"=error_measures(M0)$cv,
"stan.M1"=stanfitM1, "stan.M0"=stanfitM0)
} else {
list("bf"=exp(M1$logml - M0$logml),
"pererr.M1"=error_measures(M1)$cv,
"pererr.M0"=error_measures(M0)$cv)
}
}
BFBS()
# MCMC <- BFBS(diag=T)
# rstan::summary(MCMC$stan.M1)[[1]]
# rstan::summary(MCMC$stan.M0)[[1]]