============================================================================================================
Part I: matrix data structure, ‘R2jags’, & ‘bridgesampling’, https://rpubs.com/sherloconan/1018077 (unreliable estimates)
Part II: ragged data structure, ‘R2jags’, & ‘bridgesampling’, https://rpubs.com/sherloconan/1021087
Part III: matrix data structure, ‘rstan’, & ‘bridgesampling’, https://rpubs.com/sherloconan/1021097
Part IV: ragged data structure, ‘rstan’, & ‘bridgesampling’, https://rpubs.com/sherloconan/1022575
Part V: modeling fixed effects, https://rpubs.com/sherloconan/1070217
THIS: heteroscedastic error covariance matrix, https://rpubs.com/sherloconan/1075410
In Loftus and Masson (1994), some to-be-recalled 20-word lists are presented at a rate of 1, 2, or 5 sec per word. Suppose that the hypothetical experiment is run as a balanced one-way between-subjects design with three conditions.
dat <- matrix(c(10,6,11,22,16,15,1,12,9,8,
13,8,14,23,18,17,1,15,12,9,
13,8,14,25,20,17,4,17,12,12), ncol=3,
dimnames=list(NULL, paste0("Level",1:3))) #wide data format
# dat <- dat * 100
df <- data.frame("Level"=factor(rep(paste0("Level",1:3), each=10)),
"Response"=c(dat)) #long data format
Or, testing a simulated data set.
m1 <- 100; sd <- 20; n <- 24
m2 <- m1 - pwr::pwr.t.test(n=n, power=.8)$d * sd #83.47737
set.seed(277)
dat <- matrix(c(rnorm(n, m1, sd),
rnorm(n, m2, sd)), ncol=2,
dimnames=list(NULL, paste0("Level",1:2)))
# dat <- dat * 100
df <- data.frame("Level"=factor(rep(paste0("Level",1:2), each=n)),
"Response"=c(dat))
## The Bayes factors will be computed for the simulated data (in the second chunk).
Method: Constructing the Jeffreys-Zellner-Siow (JZS) Bayes factor.
\[\begin{align*} \tag{1} \mathcal{M}_1:\ Y_{ij}&=\mu+\sigma_\epsilon d_i+\epsilon_{ij}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ij}&=\mu+\epsilon_{ij},\qquad\epsilon_{ij}\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,\sigma_\epsilon^2) \text{ for } i=1,\dotsb,a;\ j=1,\dotsb,n. \end{align*}\]
\[\begin{equation} \tag{2} \pi(\mu,\sigma_\epsilon^2)\propto 1/\ \sigma_\epsilon^{2} \end{equation}\]
\[\begin{equation} \tag{3} d_i^\star\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]
\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]
\[\begin{equation} \tag{5} (d_1^\star,\dotsb,d_{a-1}^\star)=(d_1,\dotsb,d_a)\cdot\mathbf{Q} \end{equation}\]
\[\begin{equation} \tag{6} \mathbf{I}_a-\frac{1}{a}\mathbf{J}_a=\mathbf{Q}\cdot\mathbf{Q}^\top \end{equation}\]
\(\mathbf{Q}\) is an \(a\times(a-1)\) matrix of the \(a-1\) eigenvectors of unit length corresponding to the nonzero eigenvalues of the left side term in (6). For example, the projected main effect \(d^\star=(d_1-d_2)/\sqrt{2}\) when \(a=2\). In the other direction (given \(d_1+d_2=0\)), \(d_1=d^\star/\sqrt{2}\) and \(d_2=-d^\star/\sqrt{2}\).
(JZS_BF <- anovaBF(Response~Level, data=df, progress=F))
## Bayes factor analysis
## --------------
## [1] Level : 17.5748 ±0%
##
## Against denominator:
## Intercept only
## ---
## Bayes factor type: BFlinearModel, JZS
ttestBF(x=df$Response[df$Level=="Level1"], y=df$Response[df$Level=="Level2"]) #Same estimates when a=2
## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 17.5748 ±0%
##
## Against denominator:
## Null, mu1-mu2 = 0
## ---
## Bayes factor type: BFindepSample, JZS
Note: Fixed effects are assumed. See the discussion here.
The current BayesFactor package does not allow for the heteroscedasticity.
Markov chain Monte Carlo (MCMC) diagnostics. Plots of iterations versus sampled values for each variable in the chain.
set.seed(277)
draws <- posterior(JZS_BF, iterations=1e5, progress=F)
summary(draws)
##
## Iterations = 1:1e+05
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 1e+05
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## mu 95.152 3.137 0.009919 0.009899
## Level-Level1 8.871 3.150 0.009962 0.011177
## Level-Level2 -8.871 3.150 0.009962 0.011177
## sig2 475.052 103.102 0.326037 0.341818
## g_Level 2.961 50.387 0.159337 0.159337
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## mu 88.98314 93.0632 95.1624 97.233 101.331
## Level-Level1 2.73197 6.7557 8.8379 10.978 15.102
## Level-Level2 -15.10248 -10.9780 -8.8379 -6.756 -2.732
## sig2 314.51113 401.8004 461.5993 532.429 716.150
## g_Level 0.06614 0.2058 0.4338 1.084 12.514
plot(draws, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
\[\begin{align*} \tag{a} \mathcal{M}_1:\ Y_{ij}&=\mu+\sigma d_i+\epsilon_{ij}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ij}&=\mu+\epsilon_{ij}\\ \sigma&=\left(\prod_{i=1}^a\sigma_i\right)^{\frac{1}{a}}\\ \epsilon_{ij}&\overset{\text{ind.}}{\sim}\mathcal{N}(0,\sigma_i^2)\qquad\text{for } i=1,\dotsb,a;\ j=1,\dotsb,n. \end{align*}\]
\[\begin{equation} \tag{b} \pi(\mu)\propto 1 \end{equation}\]
\[\begin{equation} \tag{c} \sigma_i^2\overset{\text{i.i.d.}}{\sim}\text{Inv-gamma}(\alpha,\beta) \end{equation}\]
\[\begin{equation} \tag{3} d_i^\star\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]
\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]
\[\begin{equation} \tag{5} (d_1^\star,\dotsb,d_{a-1}^\star)=(d_1,\dotsb,d_a)\cdot\mathbf{Q} \end{equation}\]
\[\begin{equation} \tag{6} \mathbf{I}_a-\frac{1}{a}\mathbf{J}_a=\mathbf{Q}\cdot\mathbf{Q}^\top \end{equation}\]
\(\mathbf{Q}\) is an \(a\times(a-1)\) matrix of the \(a-1\) eigenvectors of unit length corresponding to the nonzero eigenvalues of the left side term in (6). For example, the projected main effect \(d^\star=(d_1-d_2)/\sqrt{2}\) when \(a=2\). In the other direction (given \(d_1+d_2=0\)), \(d_1=d^\star/\sqrt{2}\) and \(d_2=-d^\star/\sqrt{2}\).
computeQ <- function(C) {
#' reduced parametrization for the fixed treatment effects
S <- diag(C) - matrix(1, nrow = C, ncol = C) / C
e <- qr(S)
Q <- qr.Q(e) %*% diag(sign(diag(qr.R(e))))
Q[, which(abs(e$qraux) > 1e-3), drop = FALSE]
}
Specify the full model \(\mathcal{M}_1\) in (a).
stancodeM1 initially declares the reduced (fixed) treatment
effects tf (i.e., \(\sigma
d_{i}^\star\)). Compile and fit the model in Stan.
Note that to compute the (log) marginal likelihood
for a Stan model, we need to specify the model in a certain way. Instead
of using “~” signs for specifying distributions, we need to directly use
the (log) density functions. The reason for this is that when using the
“~” sign, constant terms are dropped which are not needed for sampling
from the posterior. However, for computing the marginal likelihood,
these constants need to be retained. For instance, instead of writing
y ~ normal(mu, sigma); we would need to write
target += normal_lpdf(y | mu, sigma); (Gronau,
2021). See also the discussion regarding the implicit
uniform prior for computing the log marginal likelihood.
stancodeM1 <- "
data {
int<lower=1> N; // number of subjects in the balanced group
int<lower=2> C; // number of conditions
// vector[C] Y[N]; // responses [removed features]
array[N] vector[C] Y; // responses [Stan 2.26+ syntax for array declarations]
matrix[C,C-1] P; // projecting C-1 fixed effects into C
real<lower=0> ht; // scale parameter of the variance of the standardized reduced treatment effect
real lwr; // lower bound for the grand mean
real upr; // upper bound for the grand mean
real<lower=0> shape; // shape parameter of the inverse-gamma distribution
real<lower=0> scale; // scale parameter of the inverse-gamma distribution
}
parameters {
real<lower=lwr, upper=upr> mu; // grand mean
vector<lower=0>[C] diag; // (heteroscedastic) diagonal of the error covariance matrix
real<lower=0> gt; // variance of the standardized reduced treatment effect
vector[C-1] tf; // reduced treatment effect
}
transformed parameters {
cov_matrix[C] Sigma; // error covariance matrix
real multiplier; // (square root) geometric mean of the determinant of the error covariance matrix
real<lower=0> eta; // standard deviation of the reduced treatment effect
vector[C] t; // treatment effect
for (i in 1:C) {
for (j in 1:C) {
if (i == j) {
Sigma[i,j] = diag[i];
} else {
Sigma[i,j] = 0;
}
}
}
multiplier = pow(determinant(Sigma), 1/(2*C));
eta = multiplier * sqrt(gt);
t = P * tf;
}
model {
target += uniform_lpdf(mu | lwr, upr);
for (i in 1:C) {
target += inv_gamma_lpdf(diag[i] | shape, scale);
}
target += scaled_inv_chi_square_lpdf(gt | 1, ht);
target += normal_lpdf(tf | 0, eta);
for (k in 1:N) {
target += multi_normal_lpdf(Y[k] | mu + t, Sigma);
}
}"
stanmodelM1 <- stan_model(model_code=stancodeM1)
stanfitM1 <- sampling(stanmodelM1, data=list("N"=nrow(dat), "C"=ncol(dat), "Y"=dat,
"P"=computeQ(ncol(dat)), "ht"=.5,
"lwr"=mean(dat)-6*sd(dat),
"upr"=mean(dat)+6*sd(dat),
"shape"=2, "scale"=4),
iter=50000, warmup=10000, chains=4, seed=277, refresh=0)
## Warning: There were 14639 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: Examine the pairs() plot to diagnose sampling problems
rstan::summary(stanfitM1)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
| mean | se_mean | sd | 2.5% | 25% | 50% | 75% | 97.5% | n_eff | Rhat | |
|---|---|---|---|---|---|---|---|---|---|---|
| mu | 94.6839 | 0.0360 | 3.2196 | 88.2568 | 92.6052 | 94.7025 | 96.8195 | 100.9947 | 7978.744 | 1.0009 |
| diag[1] | 488.7818 | 1.6706 | 157.0704 | 273.7136 | 379.6661 | 458.1697 | 563.2776 | 877.9111 | 8839.333 | 1.0011 |
| diag[2] | 395.0577 | 1.4060 | 124.1892 | 218.6451 | 308.1683 | 372.2229 | 457.1979 | 697.7886 | 7801.713 | 1.0008 |
| gt | 522.2068 | 37.6258 | 11537.1900 | 0.1137 | 5.3850 | 45.1040 | 154.3278 | 2074.7501 | 94021.594 | 1.0000 |
| tf[1] | 8.3375 | 0.0826 | 5.9969 | -0.4292 | 2.4984 | 8.8247 | 12.9584 | 19.3607 | 5272.530 | 1.0005 |
| Sigma[1,1] | 488.7818 | 1.6706 | 157.0704 | 273.7136 | 379.6661 | 458.1697 | 563.2776 | 877.9111 | 8839.333 | 1.0011 |
| Sigma[1,2] | 0.0000 | NaN | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | NaN | NaN |
| Sigma[2,1] | 0.0000 | NaN | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | NaN | NaN |
| Sigma[2,2] | 395.0577 | 1.4060 | 124.1892 | 218.6451 | 308.1683 | 372.2229 | 457.1979 | 697.7886 | 7801.713 | 1.0008 |
| multiplier | 1.0000 | NaN | 0.0000 | 1.0000 | 1.0000 | 1.0000 | 1.0000 | 1.0000 | NaN | NaN |
| eta | 10.5792 | 0.1150 | 20.2556 | 0.3372 | 2.3206 | 6.7160 | 12.4229 | 45.5494 | 31031.113 | 1.0000 |
| t[1] | 5.8955 | 0.0584 | 4.2404 | -0.3035 | 1.7667 | 6.2400 | 9.1630 | 13.6901 | 5272.530 | 1.0005 |
| t[2] | -5.8955 | 0.0584 | 4.2404 | -13.6901 | -9.1630 | -6.2400 | -1.7667 | 0.3035 | 5272.530 | 1.0005 |
| lp__ | -243.9056 | 0.0121 | 1.5991 | -247.8675 | -244.7068 | -243.5971 | -242.7608 | -241.7159 | 17576.382 | 1.0000 |
\[\begin{align*} \tag{a} \mathcal{M}_1:\ Y_{ij}&=\mu+\sigma d_i+\epsilon_{ij}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ij}&=\mu+\epsilon_{ij}\\ \sigma&=\left(\prod_{i=1}^a\sigma_i\right)^{\frac{1}{a}}\\ \epsilon_{ij}&\overset{\text{ind.}}{\sim}\mathcal{N}(0,\sigma_i^2)\qquad\text{for } i=1,\dotsb,a;\ j=1,\dotsb,n. \end{align*}\]
\[\begin{equation} \tag{b} \pi(\mu)\propto 1 \end{equation}\]
\[\begin{equation} \tag{c} \sigma_i^2\overset{\text{i.i.d.}}{\sim}\text{Inv-gamma}(\alpha,\ \beta) \end{equation}\]
\[\begin{equation} \tag{3} d_i^\star\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]
\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]
\[\begin{equation} \tag{5} (d_1^\star,\dotsb,d_{a-1}^\star)=(d_1,\dotsb,d_a)\cdot\mathbf{Q} \end{equation}\]
\[\begin{equation} \tag{6} \mathbf{I}_a-\frac{1}{a}\mathbf{J}_a=\mathbf{Q}\cdot\mathbf{Q}^\top \end{equation}\]
\(\mathbf{Q}\) is an \(a\times(a-1)\) matrix of the \(a-1\) eigenvectors of unit length corresponding to the nonzero eigenvalues of the left side term in (6). For example, the projected main effect \(d^\star=(d_1-d_2)/\sqrt{2}\) when \(a=2\). In the other direction (given \(d_1+d_2=0\)), \(d_1=d^\star/\sqrt{2}\) and \(d_2=-d^\star/\sqrt{2}\).
Specify the null model \(\mathcal{M}_0\) in (a). Compile and fit the model in Stan.
stancodeM0 <- "
data {
int<lower=1> N; // number of subjects in the balanced group
int<lower=2> C; // number of conditions
// vector[C] Y[N]; // responses [removed features]
array[N] vector[C] Y; // responses [Stan 2.26+ syntax for array declarations]
real lwr; // lower bound for the grand mean
real upr; // upper bound for the grand mean
real<lower=0> shape; // shape parameter of the inverse-gamma distribution
real<lower=0> scale; // scale parameter of the inverse-gamma distribution
}
parameters {
real<lower=lwr, upper=upr> mu; // grand mean
vector<lower=0>[C] diag; // (heteroscedastic) diagonal of the error covariance matrix
}
transformed parameters {
vector[C] muS; // grand mean vector
cov_matrix[C] Sigma; // error covariance matrix
for (i in 1:C) {
muS[i] = mu;
for (j in 1:C) {
if (i == j) {
Sigma[i,j] = diag[i];
} else {
Sigma[i,j] = 0;
}
}
}
}
model {
target += uniform_lpdf(mu | lwr, upr);
for (i in 1:C) {
target += inv_gamma_lpdf(diag[i] | shape, scale);
}
for (k in 1:N) {
target += multi_normal_lpdf(Y[k] | muS, Sigma);
}
}"
stanmodelM0 <- stan_model(model_code=stancodeM0)
stanfitM0 <- sampling(stanmodelM0, data=list("N"=nrow(dat), "C"=ncol(dat), "Y"=dat,
"lwr"=mean(dat)-6*sd(dat),
"upr"=mean(dat)+6*sd(dat),
"shape"=2, "scale"=4),
iter=50000, warmup=10000, chains=4, seed=277, refresh=0)
rstan::summary(stanfitM0)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
| mean | se_mean | sd | 2.5% | 25% | 50% | 75% | 97.5% | n_eff | Rhat | |
|---|---|---|---|---|---|---|---|---|---|---|
| mu | 94.0230 | 0.0127 | 3.8496 | 86.6074 | 91.3915 | 93.9863 | 96.6051 | 101.6985 | 92207.20 | 1 |
| diag[1] | 567.8389 | 0.6105 | 183.5351 | 309.0799 | 439.0416 | 534.7213 | 660.2613 | 1015.9509 | 90387.32 | 1 |
| diag[2] | 448.7456 | 0.4947 | 146.0000 | 244.4481 | 346.0317 | 422.1048 | 522.2550 | 805.2479 | 87112.25 | 1 |
| muS[1] | 94.0230 | 0.0127 | 3.8496 | 86.6074 | 91.3915 | 93.9863 | 96.6051 | 101.6985 | 92207.20 | 1 |
| muS[2] | 94.0230 | 0.0127 | 3.8496 | 86.6074 | 91.3915 | 93.9863 | 96.6051 | 101.6985 | 92207.20 | 1 |
| Sigma[1,1] | 567.8389 | 0.6105 | 183.5351 | 309.0799 | 439.0416 | 534.7213 | 660.2613 | 1015.9509 | 90387.32 | 1 |
| Sigma[1,2] | 0.0000 | NaN | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | NaN | NaN |
| Sigma[2,1] | 0.0000 | NaN | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | NaN | NaN |
| Sigma[2,2] | 448.7456 | 0.4947 | 146.0000 | 244.4481 | 346.0317 | 422.1048 | 522.2550 | 805.2479 | 87112.25 | 1 |
| lp__ | -240.8893 | 0.0045 | 1.2266 | -244.0580 | -241.4453 | -240.5720 | -239.9942 | -239.4942 | 73151.15 | 1 |
Compute log marginal likelihoods for full and null models via bridge sampling. Then, \(\textit{BF}_{10}=\frac{p(\text{Data}\ \mid\ \mathcal{M}_1)}{p(\text{Data}\ \mid\ \mathcal{M}_0)}\).
M1 <- bridge_sampler(stanfitM1, silent=T)
summary(M1)
##
## Bridge sampling log marginal likelihood estimate
## (method = "normal", repetitions = 1):
##
## -240.777
##
## Error Measures:
##
## Relative Mean-Squared Error: 1.334952e-05
## Coefficient of Variation: 0.003653699
## Percentage Error: 0%
##
## Note:
## All error measures are approximate.
M0 <- bridge_sampler(stanfitM0, silent=T)
summary(M0)
##
## Bridge sampling log marginal likelihood estimate
## (method = "normal", repetitions = 1):
##
## -242.1539
##
## Error Measures:
##
## Relative Mean-Squared Error: 1.385761e-07
## Coefficient of Variation: 0.000372258
## Percentage Error: 0%
##
## Note:
## All error measures are approximate.
bf(M1, M0)
## Estimated Bayes factor in favor of M1 over M0: 3.96248
Do we expect the Bayes factor estimates to vary significantly despite different model specifications?
Check the Monte Carlo error of estimating the Bayes factor.
library(rstan)
# devtools::install_github("quentingronau/bridgesampling@master")
library(bridgesampling)
library(BayesFactor)
run.this=F #if TRUE, test the hypothetical data; if FALSE, test the simulated data
if (run.this) {
dat <- matrix(c(10,6,11,22,16,15,1,12,9,8,
13,8,14,23,18,17,1,15,12,9,
13,8,14,25,20,17,4,17,12,12), ncol=3,
dimnames=list(NULL, paste0("Level",1:3))) #wide data format
df <- data.frame("Level"=factor(rep(paste0("Level",1:3), each=10)),
"Response"=c(dat)) #long data format
} else {
m1 <- 100; sd <- 20; n <- 24
m2 <- m1 - pwr::pwr.t.test(n=n, power=.8)$d * sd #83.47737
set.seed(277)
dat <- matrix(c(rnorm(n, m1, sd),
rnorm(n, m2, sd)), ncol=2,
dimnames=list(NULL, paste0("Level",1:2)))
df <- data.frame("Level"=factor(rep(paste0("Level",1:2), each=n)),
"Response"=c(dat))
}
(JZS_BF <- anovaBF(Response~Level, data=df, progress=F))
# ttestBF(x=df$Response[df$Level=="Level1"], y=df$Response[df$Level=="Level2"]) #Same estimates when a=2
# set.seed(277)
# draws <- posterior(JZS_BF, iterations=1e5, progress=F)
# summary(draws)
# plot(draws, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
computeQ <- function(C) {
#' reduced parametrization for the fixed treatment effects
S <- diag(C) - matrix(1, nrow = C, ncol = C) / C
e <- qr(S)
Q <- qr.Q(e) %*% diag(sign(diag(qr.R(e))))
Q[, which(abs(e$qraux) > 1e-3), drop = FALSE]
}
{
stancodeM1 <- "
data {
int<lower=1> N; // number of subjects in the balanced group
int<lower=2> C; // number of conditions
// vector[C] Y[N]; // responses [removed features]
array[N] vector[C] Y; // responses [Stan 2.26+ syntax for array declarations]
matrix[C,C-1] P; // projecting C-1 fixed effects into C
real<lower=0> ht; // scale parameter of the variance of the standardized reduced treatment effect
real lwr; // lower bound for the grand mean
real upr; // upper bound for the grand mean
real<lower=0> shape; // shape parameter of the inverse-gamma distribution
real<lower=0> scale; // scale parameter of the inverse-gamma distribution
}
parameters {
real<lower=lwr, upper=upr> mu; // grand mean
vector<lower=0>[C] diag; // (heteroscedastic) diagonal of the error covariance matrix
real<lower=0> gt; // variance of the standardized reduced treatment effect
vector[C-1] tf; // reduced treatment effect
}
transformed parameters {
cov_matrix[C] Sigma; // error covariance matrix
real multiplier; // (square root) geometric mean of the determinant of the error covariance matrix
real<lower=0> eta; // standard deviation of the reduced treatment effect
vector[C] t; // treatment effect
for (i in 1:C) {
for (j in 1:C) {
if (i == j) {
Sigma[i,j] = diag[i];
} else {
Sigma[i,j] = 0;
}
}
}
multiplier = pow(determinant(Sigma), 1/(2*C));
eta = multiplier * sqrt(gt);
t = P * tf;
}
model {
target += uniform_lpdf(mu | lwr, upr);
for (i in 1:C) {
target += inv_gamma_lpdf(diag[i] | shape, scale);
}
target += scaled_inv_chi_square_lpdf(gt | 1, ht);
target += normal_lpdf(tf | 0, eta);
for (k in 1:N) {
target += multi_normal_lpdf(Y[k] | mu + t, Sigma);
}
}"
stanmodelM1 <- stan_model(model_code=stancodeM1)
stancodeM0 <- "
data {
int<lower=1> N; // number of subjects in the balanced group
int<lower=2> C; // number of conditions
// vector[C] Y[N]; // responses [removed features]
array[N] vector[C] Y; // responses [Stan 2.26+ syntax for array declarations]
real lwr; // lower bound for the grand mean
real upr; // upper bound for the grand mean
real<lower=0> shape; // shape parameter of the inverse-gamma distribution
real<lower=0> scale; // scale parameter of the inverse-gamma distribution
}
parameters {
real<lower=lwr, upper=upr> mu; // grand mean
vector<lower=0>[C] diag; // (heteroscedastic) diagonal of the error covariance matrix
}
transformed parameters {
vector[C] muS; // grand mean vector
cov_matrix[C] Sigma; // error covariance matrix
for (i in 1:C) {
muS[i] = mu;
for (j in 1:C) {
if (i == j) {
Sigma[i,j] = diag[i];
} else {
Sigma[i,j] = 0;
}
}
}
}
model {
target += uniform_lpdf(mu | lwr, upr);
for (i in 1:C) {
target += inv_gamma_lpdf(diag[i] | shape, scale);
}
for (k in 1:N) {
target += multi_normal_lpdf(Y[k] | muS, Sigma);
}
}"
stanmodelM0 <- stan_model(model_code=stancodeM0)
} #Stan models
BFBS <- function(seed=277, diagnostics=F) {
stanfitM0 <- sampling(stanmodelM0, data=list("N"=nrow(dat), "C"=ncol(dat), "Y"=dat,
"lwr"=mean(dat)-6*sd(dat),
"upr"=mean(dat)+6*sd(dat),
"shape"=2, "scale"=4),
iter=50000, warmup=10000, chains=4,
seed=seed, refresh=0)
stanfitM1 <- sampling(stanmodelM1, data=list("N"=nrow(dat), "C"=ncol(dat), "Y"=dat,
"P"=computeQ(ncol(dat)), "ht"=.5,
"lwr"=mean(dat)-6*sd(dat),
"upr"=mean(dat)+6*sd(dat),
"shape"=2, "scale"=4),
iter=50000, warmup=10000, chains=4,
seed=seed, refresh=0)
set.seed(seed)
M0 <- bridge_sampler(stanfitM0, silent=T)
set.seed(seed)
M1 <- bridge_sampler(stanfitM1, silent=T)
if (diagnostics) {
list("bf"=exp(M1$logml - M0$logml),
"pererr.M1"=error_measures(M1)$cv,
"pererr.M0"=error_measures(M0)$cv,
"stan.M1"=stanfitM1, "stan.M0"=stanfitM0)
} else {
list("bf"=exp(M1$logml - M0$logml),
"pererr.M1"=error_measures(M1)$cv,
"pererr.M0"=error_measures(M0)$cv)
}
}
BFBS()
# MCMC <- BFBS(diag=T)
# rstan::summary(MCMC$stan.M1)[[1]]
# rstan::summary(MCMC$stan.M0)[[1]]
All the responses multiplied 100 times.
if (run.this) { #hypothetical
dat <- dat * 100
df <- data.frame("Level"=factor(rep(paste0("Level",1:3), each=10)),
"Response"=c(dat))
} else { #simulated
dat <- dat * 100
df <- data.frame("Level"=factor(rep(paste0("Level",1:2), each=n)),
"Response"=c(dat))
}
## The Bayes factors will be computed for the simulated data (in the else-statement).
(JZS_BF <- anovaBF(Response~Level, data=df, progress=F))
## Bayes factor analysis
## --------------
## [1] Level : 17.5748 ±0%
##
## Against denominator:
## Intercept only
## ---
## Bayes factor type: BFlinearModel, JZS
Compute log marginal likelihoods for full and null models via bridge sampling.
stanfitM1 <- sampling(stanmodelM1, data=list("N"=nrow(dat), "C"=ncol(dat), "Y"=dat,
"P"=computeQ(ncol(dat)), "ht"=.5,
"lwr"=mean(dat)-6*sd(dat),
"upr"=mean(dat)+6*sd(dat),
"shape"=2, "scale"=4),
iter=50000, warmup=10000, chains=4, seed=277, refresh=0)
## Warning: There were 33333 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: There were 39761 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: The largest R-hat is NA, indicating chains have not mixed.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#r-hat
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
rstan::summary(stanfitM1)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
| mean | se_mean | sd | 2.5% | 25% | 50% | 75% | 97.5% | n_eff | Rhat | |
|---|---|---|---|---|---|---|---|---|---|---|
| mu | 9429.8173 | 33.9247 | 3.830340e+02 | 8697.0224 | 9163.9212 | 9451.8070 | 9723.8106 | 10115.8070 | 127.4801 | 1.0448 |
| diag[1] | 5616592.3597 | 49176.7285 | 1.713643e+06 | 3105948.5321 | 4470268.9618 | 5372867.1404 | 6422784.6461 | 9816784.5991 | 1214.2876 | 1.0037 |
| diag[2] | 4467136.7768 | 59543.2046 | 1.386564e+06 | 2464669.0464 | 3444583.5976 | 4296804.8624 | 5176662.4959 | 8048450.7370 | 542.2702 | 1.0139 |
| gt | 584979.0072 | 205498.3995 | 5.295281e+07 | 0.0660 | 0.3032 | 1.0350 | 14.9799 | 1175764.6212 | 66398.9412 | 1.0000 |
| tf[1] | 69.1015 | 20.2283 | 2.906114e+02 | -8.7450 | -0.4820 | 0.0906 | 0.7747 | 1226.1587 | 206.3986 | 1.0309 |
| Sigma[1,1] | 5616592.3597 | 49176.7285 | 1.713643e+06 | 3105948.5321 | 4470268.9618 | 5372867.1404 | 6422784.6461 | 9816784.5991 | 1214.2876 | 1.0037 |
| Sigma[1,2] | 0.0000 | NaN | 0.000000e+00 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | NaN | NaN |
| Sigma[2,1] | 0.0000 | NaN | 0.000000e+00 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | NaN | NaN |
| Sigma[2,2] | 4467136.7768 | 59543.2046 | 1.386564e+06 | 2464669.0464 | 3444583.5976 | 4296804.8624 | 5176662.4959 | 8048450.7370 | 542.2702 | 1.0139 |
| multiplier | 1.0000 | NaN | 0.000000e+00 | 1.0000 | 1.0000 | 1.0000 | 1.0000 | 1.0000 | NaN | NaN |
| eta | 89.5764 | 23.6750 | 7.595780e+02 | 0.2568 | 0.5507 | 1.0174 | 3.8704 | 1084.3268 | 1029.3563 | 1.0068 |
| t[1] | 48.8621 | 14.3036 | 2.054933e+02 | -6.1836 | -0.3408 | 0.0640 | 0.5478 | 867.0251 | 206.3986 | 1.0309 |
| t[2] | -48.8621 | 14.3036 | 2.054933e+02 | -867.0251 | -0.5478 | -0.0640 | 0.3408 | 6.1836 | 206.3986 | 1.0309 |
| lp__ | -503.0797 | 0.6465 | 3.275900e+00 | -511.4753 | -504.6003 | -502.0218 | -500.7886 | -499.3219 | 25.6750 | 1.1322 |
M1 <- bridge_sampler(stanfitM1, silent=T)
summary(M1)
##
## Bridge sampling log marginal likelihood estimate
## (method = "normal", repetitions = 1):
##
## -500.1496
##
## Error Measures:
##
## Relative Mean-Squared Error: 0.0006050576
## Coefficient of Variation: 0.02459792
## Percentage Error: 2%
##
## Note:
## All error measures are approximate.
stanfitM0 <- sampling(stanmodelM0, data=list("N"=nrow(dat), "C"=ncol(dat), "Y"=dat,
"lwr"=mean(dat)-6*sd(dat),
"upr"=mean(dat)+6*sd(dat),
"shape"=2, "scale"=4),
iter=50000, warmup=10000, chains=4, seed=277, refresh=0)
rstan::summary(stanfitM0)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
| mean | se_mean | sd | 2.5% | 25% | 50% | 75% | 97.5% | n_eff | Rhat | |
|---|---|---|---|---|---|---|---|---|---|---|
| mu | 9401.5185 | 1.2985 | 385.7014 | 8658.754 | 9139.7188 | 9396.9651 | 9659.4482 | 10168.1836 | 88225.75 | 1 |
| diag[1] | 5682134.8383 | 6163.1284 | 1833320.0479 | 3084006.300 | 4388400.4801 | 5360435.6307 | 6609031.9027 | 10147366.1800 | 88485.91 | 1 |
| diag[2] | 4479675.7258 | 4979.0985 | 1465407.0596 | 2433121.908 | 3448663.9611 | 4211305.8577 | 5206848.0345 | 8126664.8959 | 86619.39 | 1 |
| muS[1] | 9401.5185 | 1.2985 | 385.7014 | 8658.754 | 9139.7188 | 9396.9651 | 9659.4482 | 10168.1836 | 88225.75 | 1 |
| muS[2] | 9401.5185 | 1.2985 | 385.7014 | 8658.754 | 9139.7188 | 9396.9651 | 9659.4482 | 10168.1836 | 88225.75 | 1 |
| Sigma[1,1] | 5682134.8383 | 6163.1284 | 1833320.0479 | 3084006.300 | 4388400.4801 | 5360435.6307 | 6609031.9027 | 10147366.1800 | 88485.91 | 1 |
| Sigma[1,2] | 0.0000 | NaN | 0.0000 | 0.000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | NaN | NaN |
| Sigma[2,1] | 0.0000 | NaN | 0.0000 | 0.000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | NaN | NaN |
| Sigma[2,2] | 4479675.7258 | 4979.0985 | 1465407.0596 | 2433121.908 | 3448663.9611 | 4211305.8577 | 5206848.0345 | 8126664.8959 | 86619.39 | 1 |
| lp__ | -498.7636 | 0.0045 | 1.2279 | -501.936 | -499.3242 | -498.4474 | -497.8608 | -497.3629 | 73706.14 | 1 |
M0 <- bridge_sampler(stanfitM0, silent=T)
summary(M0)
##
## Bridge sampling log marginal likelihood estimate
## (method = "normal", repetitions = 1):
##
## -500.0261
##
## Error Measures:
##
## Relative Mean-Squared Error: 1.384368e-07
## Coefficient of Variation: 0.000372071
## Percentage Error: 0%
##
## Note:
## All error measures are approximate.
bf(M1, M0) #Bayes factor
## Estimated Bayes factor in favor of M1 over M0: 0.88387
Do we expect the Bayes factor estimates to vary significantly despite different model specifications?