============================================================================================================ PREVIOUS: https://rpubs.com/sherloconan/1184363
THIS: https://rpubs.com/sherloconan/1187595
MORE: https://github.com/zhengxiaoUVic/Bayesian
Default functions in “BayesFactor” version 0.9.2 and later return the same Bayes factor estimates for the independent-samples \(t\)-test and one-way analysis of variance (ANOVA) with two conditions.
However, those functions return systematically different estimates for the paired \(t\)-test and one-way repeated-measures ANOVA (RMANOVA) with two conditions (see also discussion and vignettes; Wei, Nathoo, & Masson, 2023, p. 1764).
# parameters for data simulation in a within-subject design with two conditions
(params <- data.frame("n"=rep(c(24,48),each=6),
"mu_b"=100,
"delta_mu"=c(6.7,13.1,28.6,3.8,7.5,16.5,4.6,9.0,20.4,2.7,5.4,12.0),
"sigma_b"=20,
"sigma_e"=rep(c(6.67,13.10,30.55),4),
"rho"=rep(c(0.9,0.7,0.3),4),
"power"=rep(rep(c(0.8,0.3),each=3),2))) # Wei, Nathoo, and Masson (2023, Tbl 3)
## n mu_b delta_mu sigma_b sigma_e rho power
## 1 24 100 6.7 20 6.67 0.9 0.8
## 2 24 100 13.1 20 13.10 0.7 0.8
## 3 24 100 28.6 20 30.55 0.3 0.8
## 4 24 100 3.8 20 6.67 0.9 0.3
## 5 24 100 7.5 20 13.10 0.7 0.3
## 6 24 100 16.5 20 30.55 0.3 0.3
## 7 48 100 4.6 20 6.67 0.9 0.8
## 8 48 100 9.0 20 13.10 0.7 0.8
## 9 48 100 20.4 20 30.55 0.3 0.8
## 10 48 100 2.7 20 6.67 0.9 0.3
## 11 48 100 5.4 20 13.10 0.7 0.3
## 12 48 100 12.0 20 30.55 0.3 0.3
rindex <- 3 # Report indicator (rho = 0.3)
mu_b <- 100; sigma_b <- 20 # subject mean and standard deviation
n <- params$n[rindex] # sample size
delta_mu <- params$delta_mu[rindex] # effect size
sigma_e <- params$sigma_e[rindex] # error standard deviation
NSim <- 10000 # number of simulations
BF_t <- BF_RMANOVA <- BF_err <- logBF_num <- logBF_fast <- numeric(NSim)
for (i in 1:NSim) { #roughly 10 min + 4.5 h of run time
set.seed(i+NSim*rindex)
TrueScore <- rnorm(n, mu_b, sigma_b)
eij1 <- rnorm(n, 0, sigma_e)
eij2 <- rnorm(n, 0, sigma_e)
df <- data.frame("Subject"=factor(rep(c(1:n), 2)),
"Level"=factor(rep(c("Level1", "Level2"), each=n)),
"DV"=c(TrueScore + eij1 + delta_mu,
TrueScore + eij2)) # long format
BF_t[i] <- extractBF(ttestBF(df$DV[1:n] - df$DV[(n+1):(2*n)]))$bf
set.seed(277)
temp <- anovaBF(DV ~ Level + Subject, whichRandom="Subject", data=df, progress=F)
BF_RMANOVA[i] <- extractBF(temp)$bf
BF_err[i] <- extractBF(temp)$error # proportional error estimate on the Bayes factor
fit1_INLA <- inla(DV ~ 1 + Level + f(Subject, model="iid"), family="gaussian", data=df)
fit0_INLA <- inla(DV ~ 1 + f(Subject, model="iid"), family="gaussian", data=df)
logbf10 <- fit1_INLA$mlik - fit0_INLA$mlik
logBF_num[i] <- logbf10[1]
logBF_fast[i] <- logbf10[2]
}
“BayesFactor” version 0.9.12-4.7
“INLA”
version 24.05.10 (stable)
\[\begin{equation*} \tag{1} y_i\overset{\text{i.i.d.}}{\sim}\mathcal{N}(\sigma_\epsilon \delta,\ \sigma_\epsilon^2)\quad\text{for }i=1,\dotsb,N,\text{ where }\delta=\mu\ /\ \sigma_\epsilon \end{equation*}\]
\[\begin{equation*} \tag{2} \pi(\sigma_\epsilon^2)\propto\frac{1}{\sigma_\epsilon^{2}} \end{equation*}\]
\[\begin{equation*} \tag{3} \delta\sim\text{Cauchy}(0,\ h)=\frac{h}{(\delta^2+h^2)\pi} \end{equation*}\]
\[\begin{align*} \tag{4} \mathcal{M}_1:\ Y_{ik}&=\mu+\sigma_\epsilon(b_k+t_i)+\epsilon_{ik}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ik}&=\mu+\sigma_\epsilon b_k+\epsilon_{ik}, \\ \epsilon_{ik}\overset{\text{i.i.d.}}{\sim}&\mathcal{N}(0,\sigma_\epsilon^2)\qquad\text{for subject } k=1,\dotsb,n,\text{ and condition } i=1,\dotsb,a. \end{align*}\]
\[\begin{equation*} \tag{5} \pi(\mu,\sigma_\epsilon^2)\propto\frac{1}{\sigma_\epsilon^{2}} \end{equation*}\]
\[\begin{equation*} \tag{6} t_i^{\star}\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation*}\]
\[\begin{equation*} \tag{7} b_k\mid g_b\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g_b) \end{equation*}\]
\[\begin{equation*} \tag{8} (t_1^{\star},\dotsb,t_{a-1}^{\star})=(t_1,\dotsb,t_{a})\cdot\mathbf{Q} \end{equation*}\]
\[\begin{equation*} \tag{9} \mathbf{I}_a-a^{-1}\mathbf{J}_a=\mathbf{Q}\cdot\mathbf{Q^{\top}}, \end{equation*}\]
where \(\mathbf{Q}\) is an \(a\times (a-1)\) matrix of the \(a-1\) eigenvectors of unit length corresponding to the nonzero eigenvalues of the left side term in Eq. 9. For example, \(t^{\star}=(t_1-t_2)/\sqrt{2}\) when \(a=2\). In the other direction (give \(t_1+t_2=0\)), \(t_1=t^{\star}/\sqrt{2}\) and \(t_2=-t^{\star}/\sqrt{2}\).
\[\begin{equation*} \tag{10} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation*}\]
\[\begin{equation*} \tag{11} g_b\sim\text{Scale-inv-}\chi^2(1,h_b^2) \end{equation*}\]
By default, \(h=0.5\) for the fixed effects and \(h_b=1\) for the random effects.
In R-INLA (integrated nested Laplace approximation),
\[\begin{align*} \tag{4'} \mathcal{M}_1:\ Y_{ik}&=\mu+s_k+\alpha_i+\epsilon_{ik}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ik}&=\mu+s_k+\epsilon_{ik}, \\ \epsilon_{ik}\overset{\text{i.i.d.}}{\sim}&\mathcal{N}(0,\sigma_\epsilon^2)\qquad\text{for subject } k=1,\dotsb,n,\text{ and condition } i=1,\dotsb,a. \end{align*}\]
\[\begin{equation*} \tag{12} \mu\sim\mathcal{N}(0,\ \color{gray}{\text{variance = }}+\infty) \end{equation*}\]
\[\begin{equation*} \tag{13} \alpha_i\sim\mathcal{N}(0,\ \color{gray}{\text{variance = }}0.001^{-1}) \end{equation*}\]
\[\begin{equation*} \tag{14} s_k\mid g_s\sim\mathcal{N}(0,\ \color{gray}{\text{variance = }}g_s) \end{equation*}\]
\[\begin{equation*} \tag{15} g_s^{-1}\sim\text{Gamma}(1,\ \color{gray}{\text{rate = }}0.00005) \end{equation*}\]
\[\begin{equation*} \tag{16} \sigma_\epsilon^{-2}\sim\text{Gamma}(1,\ \color{gray}{\text{rate = }}0.00005) \end{equation*}\]
Equivalently, \(\sigma_\epsilon^2,\ g_s\sim\text{Inv-Gamma}(1,\ \color{gray}{\text{scale = }}0.00005)\). Or, \(\sigma_\epsilon^2,\ g_s\sim\text{Scale-inv-}\chi^2(2,\ \color{gray}{h^2=}0.00005)\).
Read more.
anovaBF versus ttestBF
result <- data.frame(BF_t, BF_RMANOVA, BF_err, "logBF_t"=log(BF_t), "logBF_RMANOVA"=log(BF_RMANOVA),
logBF_num, logBF_fast)
result$ttestBF <- factor(ifelse(result$BF_t > 1/3,
ifelse(result$BF_t > 3, "Favor H1", "Inconclusive"), "Favor H0"))
result$`ttestBF > anovaBF` <- factor(ifelse(result$BF_t > result$BF_RMANOVA, "Yes", "No"))
range(result$BF_err) # range of the proportional error estimate on BF_RMANOVA
## [1] 0.006606263 0.008822883
fig <- ggplot(subset(result, BF_err<0.01), aes(logBF_t, logBF_RMANOVA))+
geom_point(aes(color=ttestBF, shape=`ttestBF > anovaBF`), size=0.5, alpha=.45)+
scale_color_manual(values=c("#E69F00", "#56B4E9", "#999999"))+
lims(x=range(result[4:5]), y=range(result[4:5]))+
geom_abline(slope=1, color="gray")+
geom_hline(yintercept=log(c(1/3, 3)), linetype=c("dashed", "solid"), color=c("blue","red"))+
geom_vline(xintercept=log(c(1/3, 3)), linetype=c("dashed", "solid"), color=c("blue","red"))+
labs(x="Log Bayes Factor for an Effect from ttestBF", y="Log Bayes Factor for an Effect from anovaBF",
title=paste0("Within-Subject Design, Report ",rindex,
": Sample Size ",n,", Correlation ",params$rho[rindex]))+
theme_classic()+
guides(color=guide_legend(override.aes=list(size=5, alpha=1)),
shape=guide_legend(override.aes=list(size=5, alpha=1)))
# ggsave("~/Desktop/t.to.RMANOVA.png", fig, width=1800, height=1400, units="px", device="png", dpi=300)
ggplotly(fig)
Data table
data <- result
data[sapply(data, is.numeric)] <- sapply(data[sapply(data, is.numeric)], round, 2)
datatable(data, filter="top")
anovaBF versus inla
result$anovaBF <- factor(ifelse(result$BF_RMANOVA > 1/3,
ifelse(result$BF_RMANOVA > 3, "Favor H1", "Inconclusive"), "Favor H0"))
result$`INLA > anovaBF` <- factor(ifelse(result$logBF_num > result$logBF_RMANOVA, "Yes", "No"))
# Also try logBF_fast (less accurate estimate)
fig2 <- ggplot(subset(result, BF_err<0.01), aes(logBF_num, logBF_RMANOVA))+
geom_point(aes(color=anovaBF, shape=`INLA > anovaBF`), size=0.5, alpha=.45)+
scale_color_manual(values=c("#E69F00", "#56B4E9", "#999999"))+
lims(x=range(result[c("logBF_RMANOVA","logBF_num")]), y=range(result[c("logBF_RMANOVA","logBF_num")]))+
geom_abline(slope=1, color="gray")+
geom_hline(yintercept=log(c(1/3, 3)), linetype=c("dashed", "solid"), color=c("blue","red"))+
geom_vline(xintercept=log(c(1/3, 3)), linetype=c("dashed", "solid"), color=c("blue","red"))+
labs(x="Log Bayes Factor for an Effect from INLA", y="Log Bayes Factor for an Effect from anovaBF",
title=paste0("Within-Subject Design, Report ",rindex,
": Sample Size ",n,", Correlation ",params$rho[rindex]))+
theme_classic()+
guides(color=guide_legend(override.aes=list(size=5, alpha=1)),
shape=guide_legend(override.aes=list(size=5, alpha=1)))
# ggsave("~/Desktop/INLA.to.RMANOVA.png", fig2, width=1800, height=1400, units="px", device="png", dpi=300)
ggplotly(fig2)
Investigating a simulated data set
i <- 1773
set.seed(i+NSim*rindex)
TrueScore <- rnorm(n, mu_b, sigma_b)
eij1 <- rnorm(n, 0, sigma_e)
eij2 <- rnorm(n, 0, sigma_e)
df <- data.frame("Subject"=factor(rep(c(1:n), 2)),
"Level"=factor(rep(c("Level1", "Level2"), each=n)),
"DV"=c(TrueScore + eij1 + delta_mu,
TrueScore + eij2)) # long format
set.seed(277)
anovaBF(DV ~ Level + Subject, whichRandom="Subject", data=df, progress=F)
## Bayes factor analysis
## --------------
## [1] Level + Subject : 1427.973 ±0.78%
##
## Against denominator:
## DV ~ Subject
## ---
## Bayes factor type: BFlinearModel, JZS
fit1_INLA <- inla(DV ~ 1 + Level + f(Subject, model="iid"), family="gaussian", data=df)
fit0_INLA <- inla(DV ~ 1 + f(Subject, model="iid"), family="gaussian", data=df)
fit1_INLA$mlik - fit0_INLA$mlik
## [,1]
## log marginal-likelihood (integration) -5.653821
## log marginal-likelihood (Gaussian) -5.684411
cat("True error precision is",round(1/(sigma_e)^2,4),"and true subject precision is",round(1/(sigma_b)^2,4))
## True error precision is 0.0011 and true subject precision is 0.0025
summary(fit1_INLA)
## Time used:
## Pre = 0.678, Running = 0.547, Post = 0.0106, Total = 1.24
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) 137.272 7.667 122.151 137.277 152.365 137.277 0
## LevelLevel2 -35.305 6.690 -48.324 -35.372 -21.887 -35.366 0
##
## Random effects:
## Name Model
## Subject IID model
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant
## Precision for the Gaussian observations 0.002 0.001 0.001 0.002
## Precision for Subject 0.001 0.001 0.001 0.001
## 0.975quant mode
## Precision for the Gaussian observations 0.003 0.002
## Precision for Subject 0.003 0.001
##
## Marginal log-Likelihood: -268.55
## is computed
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
summary(fit0_INLA)
## Time used:
## Pre = 0.571, Running = 0.177, Post = 0.0066, Total = 0.754
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) 119.62 6.085 107.646 119.62 131.593 119.62 0
##
## Random effects:
## Name Model
## Subject IID model
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant
## Precision for the Gaussian observations 1.0e-03 0.00 0.00 1.00e-03
## Precision for Subject 2.2e+04 24155.35 1470.27 1.45e+04
## 0.975quant mode
## Precision for the Gaussian observations 1.00e-03 0.001
## Precision for Subject 8.62e+04 4009.065
##
## Marginal log-Likelihood: -262.86
## is computed
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
mlik: log marginal likelihood, \(\ln\int_{\boldsymbol{\theta}\in\boldsymbol{\Theta}_i}p(\boldsymbol{y}\mid
\boldsymbol{\theta},\mathcal{M}_i)\cdot \pi(\boldsymbol{\theta}\mid
\mathcal{M}_i)\ \!\text{d}\boldsymbol{\theta}\)
Data \(\times100\)
Scale invariance. The value of the Bayes factor is unaffected by multiplicative changes in the unit of measure of the observations. For instance, if observations are in a unit of length, the Bayes factor is the same whether the measurement is in nanometers or light-years (Rouder et al., 2012, p. 360).
Learn more about scaling effects https://becarioprecario.bitbucket.io/inla-gitbook/ch-priors.html#sec:scalingpriors.
df$DV100 <- df$DV * 100 # Data × 100
set.seed(277)
anovaBF(DV100 ~ Level + Subject, whichRandom="Subject", data=df, progress=F) # same
## Bayes factor analysis
## --------------
## [1] Level + Subject : 1427.973 ±0.78%
##
## Against denominator:
## DV100 ~ Subject
## ---
## Bayes factor type: BFlinearModel, JZS
# Option 'scale.model' is not used for model: iid
M1_INLA <- inla(DV100 ~ 1 + Level + f(Subject, model="iid"), family="gaussian", data=df)
M0_INLA <- inla(DV100 ~ 1 + f(Subject, model="iid"), family="gaussian", data=df)
M1_INLA$mlik - M0_INLA$mlik # not the same
## [,1]
## log marginal-likelihood (integration) 0.0029725711
## log marginal-likelihood (Gaussian) 0.0004664809
summary(M1_INLA)
## Time used:
## Pre = 0.514, Running = 0.18, Post = 0.00654, Total = 0.7
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) 11963.259 608.551 10765.706 11963.281 13160.686 11963.281 0
## LevelLevel2 -2.587 31.615 -64.581 -2.587 59.407 -2.587 0
##
## Random effects:
## Name Model
## Subject IID model
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant
## Precision for the Gaussian observations 0.00 0.00 0.00 0.00
## Precision for Subject 22046.79 24199.68 1470.08 14460.56
## 0.975quant mode
## Precision for the Gaussian observations 0.00 0.00
## Precision for Subject 86296.93 4007.72
##
## Marginal log-Likelihood: -488.51
## is computed
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
summary(M0_INLA)
## Time used:
## Pre = 0.497, Running = 0.177, Post = 0.00621, Total = 0.679
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) 11961.97 608.395 10764.78 11961.97 13159.15 11961.97 0
##
## Random effects:
## Name Model
## Subject IID model
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant
## Precision for the Gaussian observations 0.00 0.00 0.00 0.00
## Precision for Subject 22053.75 24279.53 1455.61 14433.79
## 0.975quant mode
## Precision for the Gaussian observations 0.00 0.00
## Precision for Subject 86511.89 3960.73
##
## Marginal log-Likelihood: -488.51
## is computed
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
.RData: https://www.kaggle.com/datasets/sherloconan/using-the-ttestbf-anovabf-and-inla-r-functions/
t.to.RMANOVA-r3.RData
t.to.RMANOVA-r1.RData