Using the ttestBF and anovaBF R Functions

============================================================================================================ PREVIOUS: https://rpubs.com/sherloconan/1184363

THIS: https://rpubs.com/sherloconan/1187595

MORE: https://github.com/zhengxiaoUVic/Bayesian

Default functions in “BayesFactor” version 0.9.2 and later return the same Bayes factor estimates for the independent-samples \(t\)-test and one-way analysis of variance (ANOVA) with two conditions.

However, those functions return systematically different estimates for the paired \(t\)-test and one-way repeated-measures ANOVA (RMANOVA) with two conditions (see also discussion and vignettes; Wei, Nathoo, & Masson, 2023, p. 1764).

Simulation

Code

# parameters for data simulation in a within-subject design with two conditions
(params <- data.frame("n"=rep(c(24,48),each=6),
                      "mu_b"=100,
                      "delta_mu"=c(6.7,13.1,28.6,3.8,7.5,16.5,4.6,9.0,20.4,2.7,5.4,12.0),
                      "sigma_b"=20,
                      "sigma_e"=rep(c(6.67,13.10,30.55),4),
                      "rho"=rep(c(0.9,0.7,0.3),4),
                      "power"=rep(rep(c(0.8,0.3),each=3),2))) # Wei, Nathoo, and Masson (2023, Tbl 3)

##     n mu_b delta_mu sigma_b sigma_e rho power
## 1  24  100      6.7      20    6.67 0.9   0.8
## 2  24  100     13.1      20   13.10 0.7   0.8
## 3  24  100     28.6      20   30.55 0.3   0.8
## 4  24  100      3.8      20    6.67 0.9   0.3
## 5  24  100      7.5      20   13.10 0.7   0.3
## 6  24  100     16.5      20   30.55 0.3   0.3
## 7  48  100      4.6      20    6.67 0.9   0.8
## 8  48  100      9.0      20   13.10 0.7   0.8
## 9  48  100     20.4      20   30.55 0.3   0.8
## 10 48  100      2.7      20    6.67 0.9   0.3
## 11 48  100      5.4      20   13.10 0.7   0.3
## 12 48  100     12.0      20   30.55 0.3   0.3

rindex <- 3 # Report indicator (rho = 0.3)

mu_b <- 100; sigma_b <- 20 # subject mean and standard deviation
n <- params$n[rindex] # sample size
delta_mu <- params$delta_mu[rindex] # effect size
sigma_e <- params$sigma_e[rindex] # error standard deviation

NSim <- 10000 # number of simulations
BF_t <- BF_RMANOVA <- BF_err <- logBF_num <- logBF_fast <- numeric(NSim)

for (i in 1:NSim) { #roughly 10 min + 4.5 h of run time
  set.seed(i+NSim*rindex)
  TrueScore <- rnorm(n, mu_b, sigma_b)
  eij1 <- rnorm(n, 0, sigma_e)
  eij2 <- rnorm(n, 0, sigma_e)
  df <- data.frame("Subject"=factor(rep(c(1:n), 2)),
                   "Level"=factor(rep(c("Level1", "Level2"), each=n)),
                   "DV"=c(TrueScore + eij1 + delta_mu,
                          TrueScore + eij2)) # long format
  
  BF_t[i] <- extractBF(ttestBF(df$DV[1:n] - df$DV[(n+1):(2*n)]))$bf
  set.seed(277)
  temp <- anovaBF(DV ~ Level + Subject, whichRandom="Subject", data=df, progress=F)
  BF_RMANOVA[i] <- extractBF(temp)$bf
  BF_err[i] <- extractBF(temp)$error # proportional error estimate on the Bayes factor
  
  fit1_INLA  <- inla(DV ~ 1 + Level + f(Subject, model="iid"), family="gaussian", data=df)
  fit0_INLA  <- inla(DV ~ 1 + f(Subject, model="iid"), family="gaussian", data=df)
  logbf10 <- fit1_INLA$mlik - fit0_INLA$mlik
  logBF_num[i] <- logbf10[1]
  logBF_fast[i] <- logbf10[2]
}

“BayesFactor” version 0.9.12-4.7
“INLA” version 24.05.10 (stable)

Models

Bayesian \(t\)-Test

\[\begin{equation*} \tag{1} y_i\overset{\text{i.i.d.}}{\sim}\mathcal{N}(\sigma_\epsilon \delta,\ \sigma_\epsilon^2)\quad\text{for }i=1,\dotsb,N,\text{ where }\delta=\mu\ /\ \sigma_\epsilon \end{equation*}\]

\[\begin{equation*} \tag{2} \pi(\sigma_\epsilon^2)\propto\frac{1}{\sigma_\epsilon^{2}} \end{equation*}\]

\[\begin{equation*} \tag{3} \delta\sim\text{Cauchy}(0,\ h)=\frac{h}{(\delta^2+h^2)\pi} \end{equation*}\]

Bayesian Repeated-Measures Analysis of Variance

\[\begin{align*} \tag{4} \mathcal{M}_1:\ Y_{ik}&=\mu+\sigma_\epsilon(b_k+t_i)+\epsilon_{ik}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ik}&=\mu+\sigma_\epsilon b_k+\epsilon_{ik}, \\ \epsilon_{ik}\overset{\text{i.i.d.}}{\sim}&\mathcal{N}(0,\sigma_\epsilon^2)\qquad\text{for subject } k=1,\dotsb,n,\text{ and condition } i=1,\dotsb,a. \end{align*}\]

\[\begin{equation*} \tag{5} \pi(\mu,\sigma_\epsilon^2)\propto\frac{1}{\sigma_\epsilon^{2}} \end{equation*}\]

\[\begin{equation*} \tag{6} t_i^{\star}\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation*}\]

\[\begin{equation*} \tag{7} b_k\mid g_b\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g_b) \end{equation*}\]

\[\begin{equation*} \tag{8} (t_1^{\star},\dotsb,t_{a-1}^{\star})=(t_1,\dotsb,t_{a})\cdot\mathbf{Q} \end{equation*}\]

\[\begin{equation*} \tag{9} \mathbf{I}_a-a^{-1}\mathbf{J}_a=\mathbf{Q}\cdot\mathbf{Q^{\top}}, \end{equation*}\]

where \(\mathbf{Q}\) is an \(a\times (a-1)\) matrix of the \(a-1\) eigenvectors of unit length corresponding to the nonzero eigenvalues of the left side term in Eq. 9. For example, \(t^{\star}=(t_1-t_2)/\sqrt{2}\) when \(a=2\). In the other direction (give \(t_1+t_2=0\)), \(t_1=t^{\star}/\sqrt{2}\) and \(t_2=-t^{\star}/\sqrt{2}\).

\[\begin{equation*} \tag{10} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation*}\]

\[\begin{equation*} \tag{11} g_b\sim\text{Scale-inv-}\chi^2(1,h_b^2) \end{equation*}\]

By default, \(h=0.5\) for the fixed effects and \(h_b=1\) for the random effects.

Bayesian Latent Gaussian Model

In R-INLA (integrated nested Laplace approximation),

\[\begin{align*} \tag{4'} \mathcal{M}_1:\ Y_{ik}&=\mu+s_k+\alpha_i+\epsilon_{ik}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ik}&=\mu+s_k+\epsilon_{ik}, \\ \epsilon_{ik}\overset{\text{i.i.d.}}{\sim}&\mathcal{N}(0,\sigma_\epsilon^2)\qquad\text{for subject } k=1,\dotsb,n,\text{ and condition } i=1,\dotsb,a. \end{align*}\]

\[\begin{equation*} \tag{12} \mu\sim\mathcal{N}(0,\ \color{gray}{\text{variance = }}+\infty) \end{equation*}\]

\[\begin{equation*} \tag{13} \alpha_i\sim\mathcal{N}(0,\ \color{gray}{\text{variance = }}0.001^{-1}) \end{equation*}\]

\[\begin{equation*} \tag{14} s_k\mid g_s\sim\mathcal{N}(0,\ \color{gray}{\text{variance = }}g_s) \end{equation*}\]

\[\begin{equation*} \tag{15} g_s^{-1}\sim\text{Gamma}(1,\ \color{gray}{\text{rate = }}0.00005) \end{equation*}\]

\[\begin{equation*} \tag{16} \sigma_\epsilon^{-2}\sim\text{Gamma}(1,\ \color{gray}{\text{rate = }}0.00005) \end{equation*}\]

Equivalently, \(\sigma_\epsilon^2,\ g_s\sim\text{Inv-Gamma}(1,\ \color{gray}{\text{scale = }}0.00005)\). Or, \(\sigma_\epsilon^2,\ g_s\sim\text{Scale-inv-}\chi^2(2,\ \color{gray}{h^2=}0.00005)\).

Results

\(\rho=0.3,\ n=24\)

anovaBF versus ttestBF

result <- data.frame(BF_t, BF_RMANOVA, BF_err, "logBF_t"=log(BF_t), "logBF_RMANOVA"=log(BF_RMANOVA),
                     logBF_num, logBF_fast)
result$ttestBF <- factor(ifelse(result$BF_t > 1/3, 
                                ifelse(result$BF_t > 3, "Favor H1", "Inconclusive"), "Favor H0"))
result$`ttestBF > anovaBF` <- factor(ifelse(result$BF_t > result$BF_RMANOVA, "Yes", "No"))

range(result$BF_err) # range of the proportional error estimate on BF_RMANOVA

## [1] 0.006606263 0.008822883

fig <- ggplot(subset(result, BF_err<0.01), aes(logBF_t, logBF_RMANOVA))+
    geom_point(aes(color=ttestBF, shape=`ttestBF > anovaBF`), size=0.5, alpha=.45)+
    scale_color_manual(values=c("#E69F00", "#56B4E9", "#999999"))+
    lims(x=range(result[4:5]), y=range(result[4:5]))+
    geom_abline(slope=1, color="gray")+
    geom_hline(yintercept=log(c(1/3, 3)), linetype=c("dashed", "solid"), color=c("blue","red"))+
    geom_vline(xintercept=log(c(1/3, 3)), linetype=c("dashed", "solid"), color=c("blue","red"))+
    labs(x="Log Bayes Factor for an Effect from ttestBF", y="Log Bayes Factor for an Effect from anovaBF",
         title=paste0("Within-Subject Design, Report ",rindex,
                      ": Sample Size ",n,", Correlation ",params$rho[rindex]))+
    theme_classic()+
    guides(color=guide_legend(override.aes=list(size=5, alpha=1)),
           shape=guide_legend(override.aes=list(size=5, alpha=1)))
# ggsave("~/Desktop/t.to.RMANOVA.png", fig, width=1800, height=1400, units="px", device="png", dpi=300)

ggplotly(fig)

Data table

data <- result
data[sapply(data, is.numeric)] <- sapply(data[sapply(data, is.numeric)], round, 2)
datatable(data, filter="top")

anovaBF versus inla

result$anovaBF <- factor(ifelse(result$BF_RMANOVA > 1/3, 
                                ifelse(result$BF_RMANOVA > 3, "Favor H1", "Inconclusive"), "Favor H0"))
result$`INLA > anovaBF` <- factor(ifelse(result$logBF_num > result$logBF_RMANOVA, "Yes", "No"))
# Also try logBF_fast (less accurate estimate)

fig2 <- ggplot(subset(result, BF_err<0.01), aes(logBF_num, logBF_RMANOVA))+
    geom_point(aes(color=anovaBF, shape=`INLA > anovaBF`), size=0.5, alpha=.45)+
    scale_color_manual(values=c("#E69F00", "#56B4E9", "#999999"))+
    lims(x=range(result[c("logBF_RMANOVA","logBF_num")]), y=range(result[c("logBF_RMANOVA","logBF_num")]))+
    geom_abline(slope=1, color="gray")+
    geom_hline(yintercept=log(c(1/3, 3)), linetype=c("dashed", "solid"), color=c("blue","red"))+
    geom_vline(xintercept=log(c(1/3, 3)), linetype=c("dashed", "solid"), color=c("blue","red"))+
    labs(x="Log Bayes Factor for an Effect from INLA", y="Log Bayes Factor for an Effect from anovaBF",
         title=paste0("Within-Subject Design, Report ",rindex,
                      ": Sample Size ",n,", Correlation ",params$rho[rindex]))+
    theme_classic()+
    guides(color=guide_legend(override.aes=list(size=5, alpha=1)),
           shape=guide_legend(override.aes=list(size=5, alpha=1)))
# ggsave("~/Desktop/INLA.to.RMANOVA.png", fig2, width=1800, height=1400, units="px", device="png", dpi=300)

ggplotly(fig2)

Investigating a simulated data set

i <- 1773
set.seed(i+NSim*rindex)
TrueScore <- rnorm(n, mu_b, sigma_b)
eij1 <- rnorm(n, 0, sigma_e)
eij2 <- rnorm(n, 0, sigma_e)
df <- data.frame("Subject"=factor(rep(c(1:n), 2)),
                 "Level"=factor(rep(c("Level1", "Level2"), each=n)),
                 "DV"=c(TrueScore + eij1 + delta_mu,
                        TrueScore + eij2)) # long format
set.seed(277)
anovaBF(DV ~ Level + Subject, whichRandom="Subject", data=df, progress=F)

## Bayes factor analysis
## --------------
## [1] Level + Subject : 1427.973 ±0.78%
## 
## Against denominator:
##   DV ~ Subject 
## ---
## Bayes factor type: BFlinearModel, JZS

fit1_INLA  <- inla(DV ~ 1 + Level + f(Subject, model="iid"), family="gaussian", data=df)
fit0_INLA  <- inla(DV ~ 1 + f(Subject, model="iid"), family="gaussian", data=df)
fit1_INLA$mlik - fit0_INLA$mlik

##                                            [,1]
## log marginal-likelihood (integration) -5.653821
## log marginal-likelihood (Gaussian)    -5.684411

cat("True error precision is",round(1/(sigma_e)^2,4),"and true subject precision is",round(1/(sigma_b)^2,4))

## True error precision is 0.0011 and true subject precision is 0.0025

summary(fit1_INLA)

## Time used:
##     Pre = 0.678, Running = 0.547, Post = 0.0106, Total = 1.24 
## Fixed effects:
##                mean    sd 0.025quant 0.5quant 0.975quant    mode kld
## (Intercept) 137.272 7.667    122.151  137.277    152.365 137.277   0
## LevelLevel2 -35.305 6.690    -48.324  -35.372    -21.887 -35.366   0
## 
## Random effects:
##   Name     Model
##     Subject IID model
## 
## Model hyperparameters:
##                                          mean    sd 0.025quant 0.5quant
## Precision for the Gaussian observations 0.002 0.001      0.001    0.002
## Precision for Subject                   0.001 0.001      0.001    0.001
##                                         0.975quant  mode
## Precision for the Gaussian observations      0.003 0.002
## Precision for Subject                        0.003 0.001
## 
## Marginal log-Likelihood:  -268.55 
##  is computed 
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')

summary(fit0_INLA)

## Time used:
##     Pre = 0.571, Running = 0.177, Post = 0.0066, Total = 0.754 
## Fixed effects:
##               mean    sd 0.025quant 0.5quant 0.975quant   mode kld
## (Intercept) 119.62 6.085    107.646   119.62    131.593 119.62   0
## 
## Random effects:
##   Name     Model
##     Subject IID model
## 
## Model hyperparameters:
##                                            mean       sd 0.025quant 0.5quant
## Precision for the Gaussian observations 1.0e-03     0.00       0.00 1.00e-03
## Precision for Subject                   2.2e+04 24155.35    1470.27 1.45e+04
##                                         0.975quant     mode
## Precision for the Gaussian observations   1.00e-03    0.001
## Precision for Subject                     8.62e+04 4009.065
## 
## Marginal log-Likelihood:  -262.86 
##  is computed 
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')

mlik: log marginal likelihood, \(\ln\int_{\boldsymbol{\theta}\in\boldsymbol{\Theta}_i}p(\boldsymbol{y}\mid \boldsymbol{\theta},\mathcal{M}_i)\cdot \pi(\boldsymbol{\theta}\mid \mathcal{M}_i)\ \!\text{d}\boldsymbol{\theta}\)

Data \(\times100\)

Scale invariance. The value of the Bayes factor is unaffected by multiplicative changes in the unit of measure of the observations. For instance, if observations are in a unit of length, the Bayes factor is the same whether the measurement is in nanometers or light-years (Rouder et al., 2012, p. 360).

Learn more about scaling effects https://becarioprecario.bitbucket.io/inla-gitbook/ch-priors.html#sec:scalingpriors.

df$DV100 <- df$DV * 100 # Data × 100
set.seed(277)
anovaBF(DV100 ~ Level + Subject, whichRandom="Subject", data=df, progress=F) # same

## Bayes factor analysis
## --------------
## [1] Level + Subject : 1427.973 ±0.78%
## 
## Against denominator:
##   DV100 ~ Subject 
## ---
## Bayes factor type: BFlinearModel, JZS

# Option 'scale.model' is not used for model: iid
M1_INLA  <- inla(DV100 ~ 1 + Level + f(Subject, model="iid"), family="gaussian", data=df)
M0_INLA  <- inla(DV100 ~ 1 + f(Subject, model="iid"), family="gaussian", data=df)
M1_INLA$mlik - M0_INLA$mlik # not the same

##                                               [,1]
## log marginal-likelihood (integration) 0.0029725711
## log marginal-likelihood (Gaussian)    0.0004664809

summary(M1_INLA)

## Time used:
##     Pre = 0.514, Running = 0.18, Post = 0.00654, Total = 0.7 
## Fixed effects:
##                  mean      sd 0.025quant  0.5quant 0.975quant      mode kld
## (Intercept) 11963.259 608.551  10765.706 11963.281  13160.686 11963.281   0
## LevelLevel2    -2.587  31.615    -64.581    -2.587     59.407    -2.587   0
## 
## Random effects:
##   Name     Model
##     Subject IID model
## 
## Model hyperparameters:
##                                             mean       sd 0.025quant 0.5quant
## Precision for the Gaussian observations     0.00     0.00       0.00     0.00
## Precision for Subject                   22046.79 24199.68    1470.08 14460.56
##                                         0.975quant    mode
## Precision for the Gaussian observations       0.00    0.00
## Precision for Subject                     86296.93 4007.72
## 
## Marginal log-Likelihood:  -488.51 
##  is computed 
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')

summary(M0_INLA)

## Time used:
##     Pre = 0.497, Running = 0.177, Post = 0.00621, Total = 0.679 
## Fixed effects:
##                 mean      sd 0.025quant 0.5quant 0.975quant     mode kld
## (Intercept) 11961.97 608.395   10764.78 11961.97   13159.15 11961.97   0
## 
## Random effects:
##   Name     Model
##     Subject IID model
## 
## Model hyperparameters:
##                                             mean       sd 0.025quant 0.5quant
## Precision for the Gaussian observations     0.00     0.00       0.00     0.00
## Precision for Subject                   22053.75 24279.53    1455.61 14433.79
##                                         0.975quant    mode
## Precision for the Gaussian observations       0.00    0.00
## Precision for Subject                     86511.89 3960.73
## 
## Marginal log-Likelihood:  -488.51 
##  is computed 
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')

scroll to top