============================================================================================================ PREVIOUS: https://rpubs.com/sherloconan/1151407

THIS: https://rpubs.com/sherloconan/1184363

MORE: https://github.com/zhengxiaoUVic/Bayesian

   

Model
(for Aggregated Data, i.e., without the Highest-Order Repeated-Measures Interaction)

\[\begin{align*} \tag{1} \mathcal{M}_1:\ Y_{ik}&=\mu+s_k+\alpha_i+\epsilon_{ik}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ik}&=\mu+s_k+\epsilon_{ik}, \\ \epsilon_{ik}\overset{\text{i.i.d.}}{\sim}&\mathcal{N}(0,\sigma_\epsilon^2)\qquad\text{for subject } k=1,\dotsb,n,\text{ and condition } i=1,\dotsb,a. \end{align*}\]

A general matrix-vector model expression is given in Eq. 13 of the “R-INLA” section.

Data
sleep: The length of increased sleep (in hours) after taking two drugs when compared to regular nights where no drug was administered.

A data frame with 20 observations on three variables.

group1 <- sleep$extra[1:10]; group2 <- sleep$extra[11:20]; resp_diff <- group1 - group2; N <- length(resp_diff)
ggplot(sleep, aes(x=group, y=extra)) +
    geom_line(aes(group=ID), linewidth=.25) + # plot individual data
    labs(x="Condition", y="Response (hour)") + theme_classic()

Analyses

\(p\)-value = 0.002833 and \(\mathit{BF}_{10}=\)
ttestBF 17.25888 Eq. 2-4
anovaBF 11.4676 Eq. 1 & 6-12
ttest.tstat (approx.) 17.25888 Eq. 5
BIC (approx.) 40.83649 Eq. 16
bridge_sampler \(\exp\left\{-42.47931-(-44.93969)\right\}\approx11.70927\) Eq. 1 & 6-12
inla \(\exp\left\{-59.42-(-60.40)\right\}\approx\) 2.661885 Eq. 13-14

Note the impact of different model specifications (priors).

Paired \(t\)-Test

 

Paired or One-Sample \(t\)-Test

 

# t.test(resp_diff)
# t.test(extra ~ group, data=sleep, paired=T) # deprecated in R 4.4.0
(fit_t <- t.test(group1, group2, paired=T))
## 
##  Paired t-test
## 
## data:  group1 and group2
## t = -4.0621, df = 9, p-value = 0.002833
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -2.4598858 -0.7001142
## sample estimates:
## mean difference 
##           -1.58
BayesFactor::ttest.tstat(t=fit_t$statistic, n1=N, simple=T) # convert t-statistic to BF10
##      B10 
## 17.25888

Note: when simple=F (default), the function returns \(\ln\mathit{BF}_{10}\).

 

scroll to top

   

Nonparametric: One-Sample Wilcoxon Signed-Rank Test

 

shapiro.test(resp_diff) # check the normality assumption
## 
##  Shapiro-Wilk normality test
## 
## data:  resp_diff
## W = 0.82987, p-value = 0.03334
wilcox.test(resp_diff, mu=0, alternative="two.sided")
## Warning in wilcox.test.default(resp_diff, mu = 0, alternative = "two.sided"):
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(resp_diff, mu = 0, alternative = "two.sided"):
## cannot compute exact p-value with zeroes
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  resp_diff
## V = 0, p-value = 0.009091
## alternative hypothesis: true location is not equal to 0

 

The test is used to determine whether the median of a single sample differs significantly from a specified value. It is especially useful when the data do not meet the assumptions of the \(t\)-test, such as normality.

 

scroll to top

   

Nonparametric: Paired Permutation Test

 

n_perm <- 2000 # number of permutations
mean_diff <- numeric(n_perm)

for (i in 1:n_perm) {
  # randomly switch the group labels and then compute the mean difference
  set.seed(i)
  mean_diff[i] <- mean(resp_diff * sample(c(-1,1), N, replace=T))
}

hist(mean_diff, prob=T, col="white", yaxt="n", ylab="", xlab="Mean Differences (hour)",
     main=paste0(n_perm," Permutations of Raw Data"))
abline(v=mean(resp_diff), col="blue", lwd=3, lty=2)
lines(density(mean_diff), col="red", lwd=2)

sum(abs(mean_diff) >= abs(mean(resp_diff))) / n_perm # estimated p-value
## [1] 0.003
mean_diff_exact <- colMeans(apply(expand.grid(rep(list(c(-1, 1)), N)),
                                  1, function(signs) resp_diff * signs))

hist(mean_diff_exact, prob=T, col="white", yaxt="n", ylab="", xlab="Mean Differences (hour)",
     main=substitute(2^exp * " Unique Permutations of Raw Data", list(exp=N)))
abline(v=mean(resp_diff), col="blue", lwd=3, lty=2)
lines(density(mean_diff_exact), col="red", lwd=2)

sum(abs(mean_diff_exact) >= abs(mean(resp_diff))) / (2^N) # exact p-value
## [1] 0.00390625

   

Further reading:

May, R. B., Masson, M. E. J., & Hunter, M. A. (1989). Randomization tests: Viable alternatives to normal curve tests. Behavior Research Methods, Instruments & Computers, 21, 482–483. https://doi.org/10.3758/BF03202823

 

scroll to top

   

RMANOVA

 

Repeated-Measures (Within-Subject) Analysis of Variance (RMANOVA)

\(\sum_{i=1}^a\alpha_i=0\) and \(s_k\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,\sigma_s^2)\).

 

A specific form of two-factor ANOVA, where one factor is the subjects (random effect) and the other is the repeated measure (fixed effect), can be equivalent to one-way RMANOVA. They are also modeled and coded similarly to a one-way ANOVA with blocking in a randomized complete block design, though their interpretations may differ.

 

Nuisance factor: It affects the response, but we are not interested in it. For example, batches of raw materials. Sometimes, several sources of variability are combined in a block, so the block becomes an aggregate variable (Montgomery, 2019, ch. 4).

  • unknown and uncontrollable (“lurking”) - randomization

  • known and uncontrollable - analysis of covariance (ANCOVA)

  • known and controllable - blocking

 

(fit_RM <- summary(aov(extra ~ group + Error(ID/group), sleep))) # one-way RMANOVA
## 
## Error: ID
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals  9  58.08   6.453               
## 
## Error: ID:group
##           Df Sum Sq Mean Sq F value  Pr(>F)   
## group      1 12.482  12.482    16.5 0.00283 **
## Residuals  9  6.808   0.756                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(extra ~ group + ID, sleep)) # two-factor ANOVA
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## group        1  12.48  12.482  16.501 0.00283 **
## ID           9  58.08   6.453   8.531 0.00190 **
## Residuals    9   6.81   0.756                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ez::ezANOVA(sleep, dv=extra, wid=ID, within=.(group), type=2, detailed=T, return_aov=T)
## $ANOVA
##        Effect DFn DFd    SSn    SSd         F          p p<.05       ges
## 1 (Intercept)   1   9 47.432 58.078  7.350253 0.02395294     * 0.4223010
## 2       group   1   9 12.482  6.808 16.500881 0.00283289     * 0.1613329
## 
## $aov
## 
## Call:
## aov(formula = formula(aov_formula), data = data)
## 
## Grand Mean: 1.54
## 
## Stratum 1: ID
## 
## Terms:
##                 Residuals
## Sum of Squares     58.078
## Deg. of Freedom         9
## 
## Residual standard error: 2.540297
## 
## Stratum 2: ID:group
## 
## Terms:
##                  group Residuals
## Sum of Squares  12.482     6.808
## Deg. of Freedom      1         9
## 
## Residual standard error: 0.8697381
## Estimated effects are balanced
fit_t$statistic^2 == unlist(fit_RM)["Error: ID:group.F value1"]
##    t 
## TRUE

\(\Longrightarrow\quad t^2=F\) and this result also holds for (unbalanced) two-sample \(t\)-test with equal variances \(\underline{\text{and}}\) one-way ANOVA with two conditions.

 

## NOTE: the toy data set and two-sample t-tests with equal variances
ttest_result <- t.test(extra ~ group, sleep, var.equal=T)
anova_result <- summary(aov(extra ~ group, sleep))               #balanced, n=10
options(digits=10)
ttest_result$statistic^2 #3.462626761
anova_result[[1]][["F value"]][1] #3.462626761

ttest_result <- t.test(extra ~ group, sleep[-5,], var.equal=T)
anova_result <- summary(aov(extra ~ group, sleep[-5,]))              #unbalanced
ttest_result$statistic^2 #2.773136741
anova_result[[1]][["F value"]][1] #2.773136741

 

scroll to top

   

Nonparametric: Friedman One-Way RMANOVA by Ranks

 

friedman.test(extra ~ group | ID, sleep)
## 
##  Friedman rank sum test
## 
## data:  extra and group and ID
## Friedman chi-squared = 9, df = 1, p-value = 0.0027

 

scroll to top

   

LMER

 

Linear Mixed-Effects Regression (LMER)
in Sum Coding (i.e., contrasts each level with the grand mean)

\[\begin{equation*} \tag{1'} \mathbf{y}=\mu\mathbf{1}+\mathbf{Z}_s\boldsymbol{s}+\mathbf{X}_{\alpha}\boldsymbol{\alpha}+\boldsymbol{\epsilon},\quad\boldsymbol{\epsilon}\sim\boldsymbol{\mathcal{N}}_{an}(\boldsymbol{0},\ \sigma_\epsilon^2\!\cdot\!\mathbf{I}) \end{equation*}\]

\(\mathbf{Z}_s=\underset{\tiny\begin{array}{r} \text{Kronecker} \\ \text{product} \end{array}}{\mathbf{1}_{a}\otimes\mathbf{I}_n}\), \(\quad\ \mathbf{X}_{\alpha}=\mathbf{I}_a\otimes\mathbf{1}_{n}\),

\(Y_{ik}\), the response for subject \(k\) under condition \(i\). Note that changing the order of subscripts (\(Y_{ki}\)) requires adjusting the matrix operations.

 

\(\widehat{Y}_i=1.54+0.79\times\mathrm{Condition}_i\)    for \(\mathrm{Condition}_1=-1\) and \(\mathrm{Condition}_2=1\).

\(\widehat{Y}_i=0.75+1.58\times\mathrm{Condition}_i\)    for \(\mathrm{Condition}_1=0\) and \(\mathrm{Condition}_2=1\).

In dummy coding, each level of a categorical variable (except the reference level) is represented by a separate binary (0/1) variable.

 

# Random intercept for each subject
summary(lme4::lmer(extra ~ group + (1 | ID), sleep, REML=F))
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: extra ~ group + (1 | ID)
##    Data: sleep
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##      78.5      82.5     -35.3      70.5        16 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.72210 -0.36005  0.03527  0.33215  1.93804 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 2.5635   1.6011  
##  Residual             0.6808   0.8251  
## Number of obs: 20, groups:  ID, 10
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   0.7500     0.5696   1.317
## group2        1.5800     0.3690   4.282
## 
## Correlation of Fixed Effects:
##        (Intr)
## group2 -0.324
summary(nlme::lme(fixed=extra ~ group, random=~1 | ID, sleep, method="ML"))
## Linear mixed-effects model fit by maximum likelihood
##   Data: sleep 
##        AIC      BIC    logLik
##   78.50469 82.48762 -35.25235
## 
## Random effects:
##  Formula: ~1 | ID
##         (Intercept)  Residual
## StdDev:    1.601093 0.8251061
## 
## Fixed effects:  extra ~ group 
##             Value Std.Error DF  t-value p-value
## (Intercept)  0.75 0.6003980  9 1.249171  0.2431
## group2       1.58 0.3889587  9 4.062127  0.0028
##  Correlation: 
##        (Intr)
## group2 -0.324
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -1.72209551 -0.36004711  0.03527154  0.33215115  1.93803983 
## 
## Number of Observations: 20
## Number of Groups: 10

 

Approximating Bayes Factors from Bayesian Information Criterion (BIC)

\[\begin{equation} \tag{16} \mathit{BF}_{10}\approx\exp\left\{\frac{\text{BIC}(\mathcal{M}_0)-\text{BIC}(\mathcal{M}_1)}{2}\right\} \end{equation}\]

Derivation in Wagenmakers (2007, Appendix B).

Discussion in Nathoo and Masson (2016).

fit1_LME <- lme4::lmer(extra ~ group + (1 | ID), sleep, REML=F)
fit0_LME <- lme4::lmer(extra ~         (1 | ID), sleep, REML=F)
exp((BIC(fit0_LME) - BIC(fit1_LME)) / 2) # Approx. BF10
## [1] 40.83649
fit1_LMEr <- nlme::lme(fixed=extra ~ group, random=~1 | ID, sleep, method="ML")
fit0_LMEr <- nlme::lme(fixed=extra ~ 1,     random=~1 | ID, sleep, method="ML")
exp((BIC(fit0_LMEr) - BIC(fit1_LMEr)) / 2) # Approx. BF10
## [1] 40.83649

 

Need to investigate if these BIC values can be plugged in directly.

 

scroll to top

 

getME(fit1_LME, "Z") # random-effects model matrix Z
## 20 x 10 sparse Matrix of class "dgCMatrix"
##   [[ suppressing 10 column names '1', '2', '3' ... ]]
##                       
## 1  1 . . . . . . . . .
## 2  . 1 . . . . . . . .
## 3  . . 1 . . . . . . .
## 4  . . . 1 . . . . . .
## 5  . . . . 1 . . . . .
## 6  . . . . . 1 . . . .
## 7  . . . . . . 1 . . .
## 8  . . . . . . . 1 . .
## 9  . . . . . . . . 1 .
## 10 . . . . . . . . . 1
## 11 1 . . . . . . . . .
## 12 . 1 . . . . . . . .
## 13 . . 1 . . . . . . .
## 14 . . . 1 . . . . . .
## 15 . . . . 1 . . . . .
## 16 . . . . . 1 . . . .
## 17 . . . . . . 1 . . .
## 18 . . . . . . . 1 . .
## 19 . . . . . . . . 1 .
## 20 . . . . . . . . . 1
kronecker(rep(1,2), diag(N)) # random-effects model matrix Z
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
##  [1,]    1    0    0    0    0    0    0    0    0     0
##  [2,]    0    1    0    0    0    0    0    0    0     0
##  [3,]    0    0    1    0    0    0    0    0    0     0
##  [4,]    0    0    0    1    0    0    0    0    0     0
##  [5,]    0    0    0    0    1    0    0    0    0     0
##  [6,]    0    0    0    0    0    1    0    0    0     0
##  [7,]    0    0    0    0    0    0    1    0    0     0
##  [8,]    0    0    0    0    0    0    0    1    0     0
##  [9,]    0    0    0    0    0    0    0    0    1     0
## [10,]    0    0    0    0    0    0    0    0    0     1
## [11,]    1    0    0    0    0    0    0    0    0     0
## [12,]    0    1    0    0    0    0    0    0    0     0
## [13,]    0    0    1    0    0    0    0    0    0     0
## [14,]    0    0    0    1    0    0    0    0    0     0
## [15,]    0    0    0    0    1    0    0    0    0     0
## [16,]    0    0    0    0    0    1    0    0    0     0
## [17,]    0    0    0    0    0    0    1    0    0     0
## [18,]    0    0    0    0    0    0    0    1    0     0
## [19,]    0    0    0    0    0    0    0    0    1     0
## [20,]    0    0    0    0    0    0    0    0    0     1
model.matrix(fit1_LME) # design matrix for dummy (one-hot) coding
##    (Intercept) group2
## 1            1      0
## 2            1      0
## 3            1      0
## 4            1      0
## 5            1      0
## 6            1      0
## 7            1      0
## 8            1      0
## 9            1      0
## 10           1      0
## 11           1      1
## 12           1      1
## 13           1      1
## 14           1      1
## 15           1      1
## 16           1      1
## 17           1      1
## 18           1      1
## 19           1      1
## 20           1      1
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$group
## [1] "contr.treatment"
## 
## attr(,"msgScaleX")
## character(0)
kronecker(diag(2), rep(1,N)) # design matrix for contrast (sum) coding
##       [,1] [,2]
##  [1,]    1    0
##  [2,]    1    0
##  [3,]    1    0
##  [4,]    1    0
##  [5,]    1    0
##  [6,]    1    0
##  [7,]    1    0
##  [8,]    1    0
##  [9,]    1    0
## [10,]    1    0
## [11,]    0    1
## [12,]    0    1
## [13,]    0    1
## [14,]    0    1
## [15,]    0    1
## [16,]    0    1
## [17,]    0    1
## [18,]    0    1
## [19,]    0    1
## [20,]    0    1

 

scroll to top

   

Bayesian \(t\)-Test

 

Bayesian \(t\)-Test

\[\begin{equation*} \tag{2} y_i\overset{\text{i.i.d.}}{\sim}\mathcal{N}(\sigma_\epsilon \delta,\ \sigma_\epsilon^2)\quad\text{for }i=1,\dotsb,N,\text{ where }\delta=\mu\ /\ \sigma_\epsilon \end{equation*}\]

\[\begin{equation*} \tag{3} \pi(\sigma_\epsilon^2)\propto\frac{1}{\sigma_\epsilon^{2}} \end{equation*}\]

\[\begin{equation*} \tag{4} \delta\sim\text{Cauchy}(0,\ h)=\frac{h}{(\delta^2+h^2)\pi} \end{equation*}\]

The Jeffreys-Zellner-Siow (JZS) Bayes factor for the Bayesian \(t\)-test, \(\mathcal{H}_0:\ \delta=0\) versus \(\mathcal{H}_1:\ \delta\neq 0\), is computed by integrating across one dimension in Eq. 5 (Ly et al., 2016, p. 24; Rouder et al., 2012, p. 360; Rouder et al., 2009, p. 237; Wei, Nathoo, & Masson, 2023, p. 1762; see also the balanced one-way between-subjects ANOVA in Morey et al., 2011, p. 374), using Gaussian quadrature.

\[\begin{equation*} \tag{5} \textit{JZS-BF}_{10}=(2\pi)^{-\frac{1}{2}}h\left(1+\frac{t^2}{\nu}\right)^{\frac{\nu+1}{2}}\int_0^\infty(1+Ng)^{-\frac{1}{2}}\left(1+\frac{t^2}{(1+Ng)\nu}\right)^{-\frac{\nu+1}{2}}g^{-\frac{3}{2}}e^{-\frac{h^2}{2g}}\text{d}g, \end{equation*}\]

where the default scale \(h=\sqrt{2}/2\) (version 0.9.2 and later), the degrees of freedom \(\nu=N-1\), \(\quad t=\bar{y}\sqrt{N}/s_y\), \(\quad\bar{y}\) is the sample mean, and \(s_y\) is the sample standard deviation in a one-sample \(t\)-test.

 

Unlike the anovaBF function, the ttestBF function of the “BayesFactor” R package yields negligible Monte Carlo errors and does not require specifying the number of Markov chain Monte Carlo iterations.

 

BayesFactor::ttestBF(resp_diff)
## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 17.25888 ±0%
## 
## Against denominator:
##   Null, mu = 0 
## ---
## Bayes factor type: BFoneSample, JZS

 

scroll to top

   

JZSBF10 <- function(h=sqrt(2)/2, t, N) {
  #' Input - 
  #' h:   the scale parameter of the Cauchy prior
  #' t:   the observed t test statistic
  #' N:   the sample size
  #' Output - the JZS Bayes factor for a one-sample t-test
  nu <- N - 1
  coef <- (2*pi)^(-0.5) * h * (1+t^2/nu)^((nu+1)/2)
  integrand <- function(g) {
    (1+N*g)^(-0.5) * (1+t^2/((1+N*g)*nu))^(-(nu+1)/2) * g^(-1.5) * exp(-h^2/(2*g))
  }
  coef * integrate(integrand, lower=0, upper=Inf)$value
}

N <- 20 #sample size
t_obs <- seq(0,5,by=0.2) #observed t test statistics
NSim <- length(t_obs)
pVal <- BF10 <- numeric(NSim)
for (i in 1:NSim) {
  pVal[i] <- pt(t_obs[i], df=N-1, lower.tail=F)*2
  BF10[i] <- JZSBF10(t=t_obs[i], N=N)
}

par(mfrow=c(1,2))
plot(pVal, 1/BF10,
     main="The Theoretical Relationship",
     sub=bquote(italic(N) == .(N)),
     xlab=expression(italic("p")~"-Value"),
     ylab=expression(italic("BF")["01"])) #closely match the empirical results
abline(v=.05, col="red")
abline(h=c(1/3,3), lty=2)

plot(t_obs, BF10,
     main="The Theoretical Relationship",
     sub=bquote(italic(N) == .(N)),
     xlab=expression("|"~italic("t")~"|-Statistic"),
     ylab=expression(italic("BF")[10]))

 

\[ \begin{align*} &\lim_{N\to\infty}\textit{JZS-BF}_{10}\left(t(N),\ N\right)= \begin{cases} 0, & \text{if $\delta=0$} \\ \infty, & \text{if $\delta\neq0$} \end{cases} \\ \\ &\lim_{t\to\infty}\textit{JZS-BF}_{10}\left(t(N),\ N\right)=\infty,\quad\forall N \end{align*} \]

 

The resulting \(\textit{JZS-BF}_{10}\) avoids critical issues:

  • Bartlett’s paradox: the Bayes factor approaches zero as the prior variance increases.

  • Information paradox: the Bayes factor tends to be bounded, given overwhelming information.

 

scroll to top

   

Bayesian RMANOVA

 

Bayesian RMANOVA

Scaling \(\alpha_i=\sigma_\epsilon\cdot t_i\) and \(s_k=\sigma_\epsilon\cdot b_k\).

\[\begin{equation*} \tag{6} \pi(\mu,\sigma_\epsilon^2)\propto\frac{1}{\sigma_\epsilon^{2}} \end{equation*}\]

\[\begin{equation*} \tag{7} t_i^{\star}\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation*}\]

\[\begin{equation*} \tag{8} b_k\mid g_b\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g_b) \end{equation*}\]

\[\begin{equation*} \tag{9} (t_1^{\star},\dotsb,t_{a-1}^{\star})=(t_1,\dotsb,t_{a})\cdot\mathbf{Q} \end{equation*}\]

\[\begin{equation*} \tag{10} \mathbf{I}_a-a^{-1}\mathbf{J}_a=\mathbf{Q}\cdot\mathbf{Q^{\top}}, \end{equation*}\]

where \(\mathbf{Q}\) is an \(a\times (a-1)\) matrix of the \(a-1\) eigenvectors of unit length corresponding to the nonzero eigenvalues of the left side term in Eq. 10. For example, \(t^{\star}=(t_1-t_2)/\sqrt{2}\) when \(a=2\). In the other direction (give \(t_1+t_2=0\)), \(t_1=t^{\star}/\sqrt{2}\) and \(t_2=-t^{\star}/\sqrt{2}\).

\[\begin{equation*} \tag{11} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation*}\]

\[\begin{equation*} \tag{12} g_b\sim\text{Scale-inv-}\chi^2(1,h_b^2) \end{equation*}\]

By default, \(h=0.5\) for the fixed effects and \(h_b=1\) for the random effects.

 

Default functions in “BayesFactor” version 0.9.2 and later return the same Bayes factor estimates for the independent-samples \(t\)-test and one-way ANOVA with two conditions. However, those functions return systematically different estimates for the paired \(t\)-test and one-way RMANOVA with two conditions (see discussion and vignettes).

 

set.seed(277)
(fit_anovaBF <- BayesFactor::anovaBF(extra ~ group + ID, data=sleep, whichRandom="ID", progress=F))
## Bayes factor analysis
## --------------
## [1] group + ID : 11.4676 ±0.68%
## 
## Against denominator:
##   extra ~ ID 
## ---
## Bayes factor type: BFlinearModel, JZS
set.seed(277)
chains <- BayesFactor::posterior(fit_anovaBF, iterations=1e5, progress=F)
summary(chains) # sample from the posterior distribution
## 
## Iterations = 1:1e+05
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 1e+05 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##            Mean       SD  Naive SE Time-series SE
## mu       1.5377   0.6045 0.0019116       0.001912
## group-1 -0.6719   0.2473 0.0007822       0.001045
## group-2  0.6719   0.2473 0.0007822       0.001045
## ID-1    -0.1900   0.8426 0.0026645       0.002678
## ID-2    -1.5586   0.8735 0.0027622       0.003546
## ID-3    -0.8780   0.8496 0.0026868       0.002872
## ID-4    -1.6788   0.8822 0.0027897       0.003723
## ID-5    -1.3207   0.8640 0.0027323       0.003252
## ID-6     1.8972   0.8953 0.0028311       0.003926
## ID-7     2.4646   0.9274 0.0029326       0.004672
## ID-8    -0.2691   0.8395 0.0026548       0.002679
## ID-9     0.6162   0.8426 0.0026645       0.002729
## ID-10    0.9319   0.8542 0.0027014       0.002966
## sig2     1.1762   0.6651 0.0021033       0.005588
## g_group  8.2156 250.8629 0.7932981       0.864024
## g_ID     3.4267   3.1089 0.0098311       0.021266
## 
## 2. Quantiles for each variable:
## 
##             2.5%      25%     50%     75%    97.5%
## mu       0.32371  1.16217  1.5378  1.9145  2.74944
## group-1 -1.14470 -0.83322 -0.6799 -0.5160 -0.16008
## group-2  0.16008  0.51603  0.6799  0.8332  1.14470
## ID-1    -1.86264 -0.73402 -0.1856  0.3547  1.46986
## ID-2    -3.29995 -2.12177 -1.5567 -0.9892  0.17148
## ID-3    -2.57498 -1.42979 -0.8744 -0.3256  0.79468
## ID-4    -3.42780 -2.25151 -1.6776 -1.1035  0.04571
## ID-5    -3.04137 -1.88045 -1.3184 -0.7577  0.38116
## ID-6     0.13118  1.31545  1.8961  2.4733  3.67944
## ID-7     0.60364  1.86403  2.4774  3.0706  4.27811
## ID-8    -1.93423 -0.80958 -0.2693  0.2701  1.40519
## ID-9    -1.04457  0.07187  0.6094  1.1592  2.29900
## ID-10   -0.74731  0.37742  0.9293  1.4818  2.63244
## sig2     0.41972  0.72660  1.0076  1.4362  2.91251
## g_group  0.08898  0.36931  0.8654  2.2938 27.20602
## g_ID     0.43536  1.46264  2.5827  4.3739 11.47310
BayesFactor::model.matrix(fit_anovaBF) # design matrices X[,-1]·Q and Z
##      group_redu_1 ID-1 ID-2 ID-3 ID-4 ID-5 ID-6 ID-7 ID-8 ID-9 ID-10
## 1  1   -0.7071068    1    0    0    0    0    0    0    0    0     0
## 2  1   -0.7071068    0    1    0    0    0    0    0    0    0     0
## 3  1   -0.7071068    0    0    1    0    0    0    0    0    0     0
## 4  1   -0.7071068    0    0    0    1    0    0    0    0    0     0
## 5  1   -0.7071068    0    0    0    0    1    0    0    0    0     0
## 6  1   -0.7071068    0    0    0    0    0    1    0    0    0     0
## 7  1   -0.7071068    0    0    0    0    0    0    1    0    0     0
## 8  1   -0.7071068    0    0    0    0    0    0    0    1    0     0
## 9  1   -0.7071068    0    0    0    0    0    0    0    0    1     0
## 10 1   -0.7071068    0    0    0    0    0    0    0    0    0     1
## 11 1    0.7071068    1    0    0    0    0    0    0    0    0     0
## 12 1    0.7071068    0    1    0    0    0    0    0    0    0     0
## 13 1    0.7071068    0    0    1    0    0    0    0    0    0     0
## 14 1    0.7071068    0    0    0    1    0    0    0    0    0     0
## 15 1    0.7071068    0    0    0    0    1    0    0    0    0     0
## 16 1    0.7071068    0    0    0    0    0    1    0    0    0     0
## 17 1    0.7071068    0    0    0    0    0    0    1    0    0     0
## 18 1    0.7071068    0    0    0    0    0    0    0    1    0     0
## 19 1    0.7071068    0    0    0    0    0    0    0    0    1     0
## 20 1    0.7071068    0    0    0    0    0    0    0    0    0     1
## attr(,"gMap")
## intercept     group        ID        ID        ID        ID        ID        ID 
##        NA         0         1         1         1         1         1         1 
##        ID        ID        ID        ID 
##         1         1         1         1

 

scroll to top

   

R-INLA

 

Integrated Nested Laplace Approximation (INLA)

 

Gaussian Process (GP) Regression Model

  • \(Y_{ik}\), the response variable for subject \(k\) under condition \(i\) in Eq 1.

\[\begin{align*} \tag{13} \mathbf{y}=\mathbf{X}\boldsymbol{\beta}+&\mathbf{Z}\mathbf{b}+\boldsymbol{\epsilon} \\ \mathbf{y}_k=&\mathbf{X}_k\boldsymbol{\beta}+\mathbf{Z}_k\mathbf{b}_k+\boldsymbol{\epsilon}_k \\ &Y_{ik}=\underset{\tiny\color{gray}{\begin{array}{r} \text{fixed} \\ \text{effects} \end{array}}}{\mathbf{x}_{ik}^\top\boldsymbol{\beta}}+\underset{\tiny\color{gray}{\begin{array}{r} \text{random} \\ \text{effects} \end{array}}}{\mathbf{z}_{ik}^\top\mathbf{b}_k}+\epsilon_{ik},\quad\epsilon_{ik}\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,\ \sigma_\epsilon^2) \\ &\qquad\text{for }i=1,\dotsb,a;\quad k=1,\dotsb,n \end{align*}\] where \(\mathbf{b}\) is a latent process capturing the dependence. See f() in the INLA formula.

\(\color{lightgray}{\underset{an\times1}{\mathbf{y}}\equiv(\mathbf{y}_1^\top,\dotsb,\mathbf{y}_n^\top)^\top\equiv\big((Y_{11},\dotsb,Y_{a1}),\dotsb,(Y_{1n},\dotsb,Y_{an})\big)^\top}\)

\(\color{lightgray}{\underset{an\times (a+1)}{\hspace{-1.4em}\mathbf{X}}\hspace{-0.9em}\equiv(\mathbf{X}_1^\top,\dotsb,\mathbf{X}_n^\top)^\top\equiv\big((\underset{(a+1)\times1}{\mathbf{x}_{11}},\dotsb,\mathbf{x}_{a1}),\dotsb,(\mathbf{x}_{1n},\dotsb,\mathbf{x}_{an})\big)^\top}\)

\(\color{lightgray}{\underset{an\times nq}{\hspace{-0.6em}\mathbf{Z}}\hspace{-0.9em}\equiv\text{diag}(\mathbf{Z}_1,\dotsb,\mathbf{Z}_n)\equiv\text{diag}\big((\underset{q\times1}{\mathbf{z}_{11}},\dotsb,\mathbf{z}_{a1})^\top,\dotsb,(\mathbf{z}_{1n},\dotsb,\mathbf{z}_{an})^\top\big)}\)

\(\color{lightgray}{\underset{nq\times1}{\mathbf{b}}\equiv(\mathbf{b}_1^\top,\dotsb,\mathbf{b}_n^\top)^\top\equiv\big((b_{11},\dotsb,b_{1q}),\dotsb,(b_{n1},\dotsb,b_{nq})\big)^\top\qquad\qquad\underset{(a+1)\times1}{\boldsymbol{\beta}}\equiv(\beta_0,\dotsb,\beta_{a})^\top}\)

  • \(\color{lightgray}{a=2,\ n=10,\ \text{and}\ q=1\quad\text{in this case}}\)

 

Latent GP \[\begin{equation*} \boldsymbol{\mathcal{N}}_{an}\left(\mathbf{y}\mid\mathbf{X}\boldsymbol{\beta}+\mathbf{Z}\mathbf{b},\ \sigma_\epsilon^2\!\cdot\!\mathbf{I}\right)\ \times\ \boldsymbol{\mathcal{N}}_{nq}\left(\mathbf{b}\mid\mathbf{0},\ \mathbf{G}\right)\ \times\ \pi(\sigma_\epsilon^2,\ \mathbf{G}) \end{equation*}\]

Response GP \[\begin{equation*} \boldsymbol{\mathcal{N}}_{an}\left(\mathbf{y}\mid\mathbf{X}\boldsymbol{\beta},\ \mathbf{ZGZ}^\top+\sigma_\epsilon^2\!\cdot\!\mathbf{I}\right)\times\ \pi(\sigma_\epsilon^2,\ \mathbf{G}) \end{equation*}\]

 

  • iid: Independent and identically distributed Gaussian random effect.

\[\begin{equation*} \tag{14} \mathbf{b}\mid\mathbf{G}\sim\boldsymbol{\mathcal{N}}_{nq}\left(\mathbf{0},\ \mathbf{G}=\text{diag}(g_1\dotsb,g_q,\dotsb,g_1,\dotsb,g_q)\right)\ \perp \!\!\! \perp\ \boldsymbol{\epsilon}\mid\sigma_\epsilon^2\sim\boldsymbol{\mathcal{N}}_{an}\left(\mathbf{0},\ \sigma_\epsilon^2\!\cdot\!\mathbf{I}\right) \end{equation*}\]

 

Gamma(1, 0.00005)

 

By default, the intercept \(\beta_0\sim\mathcal{N}(0,\ 0^{-1})\) and the rest of the fixed effects \(\beta_i\sim\mathcal{N}(0,\ 0.001^{-1})\) for \(i=1,\dotsb,a\)
(the base is the precision, i.e., the inverse of the variance).

As for the error \(\epsilon_{ik}\) and subject-specific random effect in this case, precisions \(\sigma_\epsilon^{-2},\ g_1^{-1}\sim\text{Gamma}(1,\ \color{gray}{\text{rate = }}0.00005)=\text{Exp}(0.00005)\).

Equivalently, their variances \(\sigma_\epsilon^2,\ g_1\sim\text{Inv-Gamma}(1,\ \color{gray}{\text{scale = }}0.00005)\).   Or, \(\sigma_\epsilon^2,\ g_1\sim\text{Scale-inv-}\chi^2(2,\ \color{gray}{h^2=}0.00005)\).

 

Do these priors always work? Probably not.

 

par(mfrow=c(1,2)); shape <- 1; rate <- 5E-5
curve(dgamma(x, shape=shape, rate=rate), 0.2, 1E5, xlab="Precision (the Inverse of the Variance)",
      ylab="Density", main=paste0("Gamma Prior (Shape = ",shape,", Rate = ",rate,")"))
curve(dgamma(1/x, shape=shape, rate=rate) / x^2, 0, 5, xlab="Variance", ylab="Density",
      main=paste0("Inverse-Gamma Prior (Shape = ",shape,", Scale = ",rate,")"))

fit1_INLA  <- INLA::inla(extra ~ 1 + group + f(ID, model="iid"),
                         family="gaussian", data=sleep)
# See the default priors for the fixed effects
INLA::inla.set.control.fixed.default(fit1_INLA)[c("mean.intercept", "prec.intercept", "mean", "prec")]
## $mean.intercept
## [1] 0
## 
## $prec.intercept
## [1] 0
## 
## $mean
## [1] 0
## 
## $prec
## [1] 0.001
summary(fit1_INLA)
## Time used:
##     Pre = 0.585, Running = 0.203, Post = 0.00998, Total = 0.798 
## Fixed effects:
##             mean    sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) 0.75 0.577     -0.394     0.75      1.894 0.75   0
## group2      1.58 0.405      0.772     1.58      2.388 1.58   0
## 
## Random effects:
##   Name     Model
##     ID IID model
## 
## Model hyperparameters:
##                                          mean    sd 0.025quant 0.5quant
## Precision for the Gaussian observations 1.654 0.746      0.618    1.516
## Precision for ID                        0.508 0.269      0.172    0.449
##                                         0.975quant  mode
## Precision for the Gaussian observations       3.49 1.264
## Precision for ID                              1.20 0.351
## 
## Marginal log-Likelihood:  -59.42 
##  is computed 
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
fit0_INLA  <- INLA::inla(extra ~ 1 + f(ID, model="iid"),
                         family="gaussian", data=sleep)
summary(fit0_INLA)
## Time used:
##     Pre = 0.523, Running = 0.176, Post = 0.00634, Total = 0.705 
## Fixed effects:
##             mean    sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) 1.54 0.547      0.451     1.54      2.629 1.54   0
## 
## Random effects:
##   Name     Model
##     ID IID model
## 
## Model hyperparameters:
##                                          mean    sd 0.025quant 0.5quant
## Precision for the Gaussian observations 0.501 0.229      0.157    0.464
## Precision for ID                        2.744 5.445      0.312    1.295
##                                         0.975quant  mode
## Precision for the Gaussian observations       1.03 0.385
## Precision for ID                             14.58 0.539
## 
## Marginal log-Likelihood:  -60.40 
##  is computed 
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
bf10 <- exp(fit1_INLA$mlik - fit0_INLA$mlik)
rownames(bf10) <- c("integration", "Gaussian"); colnames(bf10) <- "BF10.est"
bf10
##             BF10.est
## integration 6.850923
## Gaussian    2.661885

 

scroll to top

   

Gamma(0.0001, 0.0001)

 

The Jeffreys prior for the standard deviation is \(\pi(\sigma)\propto\frac{1}{\sigma}\) or \(\pi(\ln\sigma)\propto1\) in Eq. 6.

 

# Change the hyperpriors for the precisions of the random effects and error
shape_subj <- rate_subj <- shape_err <- rate_err <- 0.0001
fit1r_INLA  <- INLA::inla(extra ~ 1 + group + f(ID, model="iid",
                                                hyper=list(prec=list(param=c(shape_subj, rate_subj)))),
                          control.family=list(hyper=list(prec=list(param=c(shape_err, rate_err)))),
                          family="gaussian", data=sleep)
summary(fit1r_INLA)
## Time used:
##     Pre = 0.528, Running = 0.177, Post = 0.00692, Total = 0.713 
## Fixed effects:
##             mean    sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) 0.75 0.656     -0.554     0.75      2.054 0.75   0
## group2      1.58 0.441      0.698     1.58      2.462 1.58   0
## 
## Random effects:
##   Name     Model
##     ID IID model
## 
## Model hyperparameters:
##                                          mean    sd 0.025quant 0.5quant
## Precision for the Gaussian observations 1.396 0.679      0.480    1.263
## Precision for ID                        0.403 0.232      0.121    0.349
##                                         0.975quant  mode
## Precision for the Gaussian observations       3.08 1.023
## Precision for ID                              1.00 0.262
## 
## Marginal log-Likelihood:  -57.32 
##  is computed 
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
fit0r_INLA  <- INLA::inla(extra ~ 1 + f(ID, model="iid",
                                        hyper=list(prec=list(param=c(shape_subj, rate_subj)))),
                          control.family=list(hyper=list(prec=list(param=c(shape_err, rate_err)))),
                          family="gaussian", data=sleep)
summary(fit0r_INLA)
## Time used:
##     Pre = 0.544, Running = 0.175, Post = 0.00649, Total = 0.726 
## Fixed effects:
##             mean    sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) 1.54 0.604      0.336     1.54      2.744 1.54   0
## 
## Random effects:
##   Name     Model
##     ID IID model
## 
## Model hyperparameters:
##                                          mean    sd 0.025quant 0.5quant
## Precision for the Gaussian observations 0.537 0.246      0.194    0.491
## Precision for ID                        0.680 0.602      0.141    0.508
##                                         0.975quant  mode
## Precision for the Gaussian observations       1.14 0.408
## Precision for ID                              2.28 0.307
## 
## Marginal log-Likelihood:  -57.92 
##  is computed 
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
bf10r <- exp(fit1r_INLA$mlik - fit0r_INLA$mlik)
rownames(bf10r) <- c("integration", "Gaussian"); colnames(bf10r) <- "BF10.est"
bf10r
##             BF10.est
## integration 1.538954
## Gaussian    1.827413

 

scroll to top

   

Stan

 

JZS Priors (anovaBF)

Scaling \(\alpha_i=\sigma_\epsilon\cdot t_i\) and \(s_k=\sigma_\epsilon\cdot b_k\).

\[\begin{equation*} \tag{6} \pi(\mu,\sigma_\epsilon^2)\propto\frac{1}{\sigma_\epsilon^{2}} \end{equation*}\]

\[\begin{equation*} \tag{7} t_i^{\star}\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation*}\]

\[\begin{equation*} \tag{8} b_k\mid g_b\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g_b) \end{equation*}\]

\[\begin{equation*} \tag{9} (t_1^{\star},\dotsb,t_{a-1}^{\star})=(t_1,\dotsb,t_{a})\cdot\mathbf{Q} \end{equation*}\]

\[\begin{equation*} \tag{10} \mathbf{I}_a-a^{-1}\mathbf{J}_a=\mathbf{Q}\cdot\mathbf{Q^{\top}}, \end{equation*}\]

where \(\mathbf{Q}\) is an \(a\times (a-1)\) matrix of the \(a-1\) eigenvectors of unit length corresponding to the nonzero eigenvalues of the left side term in Eq. 10. For example, \(t^{\star}=(t_1-t_2)/\sqrt{2}\) when \(a=2\). In the other direction (give \(t_1+t_2=0\)), \(t_1=t^{\star}/\sqrt{2}\) and \(t_2=-t^{\star}/\sqrt{2}\)

\[\begin{equation*} \tag{11} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation*}\]

\[\begin{equation*} \tag{12} g_b\sim\text{Scale-inv-}\chi^2(1,h_b^2) \end{equation*}\]

By default, \(h=0.5\) for the fixed effects and \(h_b=1\) for the random effects.

 

Stan Version 2.32.2

Full Model

 

computeQ <- function(C) {
  #' reduced parametrization for the fixed treatment effects
  S <- diag(C) - matrix(1, nrow = C, ncol = C) / C
  e <- qr(S)
  Q <- qr.Q(e) %*% diag(sign(diag(qr.R(e))))
  Q[, which(abs(e$qraux) > 1e-3), drop = FALSE]
}

dat <- cbind(sleep$extra[1:10], sleep$extra[11:20]) #wide format

 

Specify the full model \(\mathcal{M}_1\) through Eq. 1 & 6-12, the same hierarchical specification in a Bayesian RMANOVA.

stancodeM1 <- "
data {
  int<lower=1> N;        // number of subjects
  int<lower=2> C;        // number of conditions
  // vector[C] Y[N];        // responses [removed features]
  array[N] vector[C] Y;  // responses [Stan 2.26+ syntax for array declarations]
  matrix[C,C-1] Q;       // projecting C fixed effects into C-1
  real<lower=0> ht;      // (square root) prior scale on the variability of the standardized treatment effects
  real<lower=0> hb;      // (square root) prior scale on the variability of the standardized subject-specific random effects
  real lwr;              // lower bound for the grand mean
  real upr;              // upper bound for the grand mean
}

parameters {
  real mu;              // grand mean
  real<lower=0> sigma;  // standard deviation of the error
  real<lower=0> gt;     // variance of the standardized projected fixed treatment effects
  real<lower=0> gb;     // variance of the standardized subject-specific random effects
  vector[C-1] tf;       // projected fixed treatment effects
  vector[N] b;          // subject-specific random effects
}

transformed parameters {
  real<lower=0> eta;    // sd of the projected fixed treatment effects
  real<lower=0> tau;    // sd of the subject-specific random effects
  vector[C] t;          // fixed treatment effects
  eta = sigma * sqrt(gt);
  tau = sigma * sqrt(gb);
  t = Q * tf;
}

model {
  target += uniform_lpdf(mu | lwr, upr);
  target += -log(sigma);                               // Jeffreys prior
  target += scaled_inv_chi_square_lpdf(gt | 1, ht);
  target += scaled_inv_chi_square_lpdf(gb | 1, hb);    // Rouder et al. (2012)
  target += normal_lpdf(tf | 0, eta);
  target += normal_lpdf(b | 0, tau);
  for (i in 1:N) {
    target += normal_lpdf(Y[i] | mu + t + b[i], sigma);
  }
}
"

stanmodelM1 <- stan_model(model_code=stancodeM1)
datalistM1 <- list("N"=nrow(dat), "C"=ncol(dat), "Y"=dat,
                   "Q"=computeQ(ncol(dat)), "ht"=.5, "hb"=1)
datalistM1$lwr <- mean(datalistM1[["Y"]])-6*sd(datalistM1[["Y"]])
datalistM1$upr <- mean(datalistM1[["Y"]])+6*sd(datalistM1[["Y"]])
stanfitM1 <- sampling(stanmodelM1, data=datalistM1,
                      iter=50000, warmup=10000, chains=4, seed=277, refresh=0)

rstan::summary(stanfitM1)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu 1.5429 0.0029 0.5998 0.3453 1.1668 1.5436 1.9202 2.7306 42632.96 1.0000
sigma 1.0525 0.0012 0.2753 0.6474 0.8549 1.0066 1.2030 1.7107 50887.75 1.0001
gt 24.5569 13.7133 3136.2398 0.0899 0.3682 0.8558 2.2434 27.8077 52303.94 1.0001
gb 3.4158 0.0125 3.1561 0.4290 1.4501 2.5554 4.3446 11.4854 63610.94 1.0000
tf[1] -0.9503 0.0009 0.3489 -1.6196 -1.1766 -0.9592 -0.7303 -0.2346 143712.64 1.0000
b[1] -0.1964 0.0031 0.8369 -1.8620 -0.7365 -0.1941 0.3461 1.4624 74187.22 1.0000
b[2] -1.5612 0.0033 0.8774 -3.3033 -2.1335 -1.5616 -0.9895 0.1741 70424.57 1.0000
b[3] -0.8774 0.0031 0.8452 -2.5593 -1.4226 -0.8771 -0.3269 0.7867 73447.63 1.0000
b[4] -1.6794 0.0033 0.8831 -3.4211 -2.2592 -1.6793 -1.1015 0.0727 70409.05 1.0001
b[5] -1.3187 0.0032 0.8633 -3.0322 -1.8805 -1.3182 -0.7529 0.3819 71953.32 1.0000
b[6] 1.8934 0.0034 0.8893 0.1398 1.3112 1.8952 2.4715 3.6574 69446.08 1.0000
b[7] 2.4534 0.0036 0.9275 0.6048 1.8519 2.4595 3.0588 4.2752 67362.03 1.0000
b[8] -0.2765 0.0031 0.8379 -1.9454 -0.8190 -0.2769 0.2670 1.3893 73105.46 1.0000
b[9] 0.6095 0.0031 0.8422 -1.0498 0.0617 0.6047 1.1569 2.2874 74140.44 1.0000
b[10] 0.9273 0.0032 0.8500 -0.7428 0.3740 0.9209 1.4768 2.6212 72503.73 1.0000
eta 1.4274 0.0183 4.1534 0.3372 0.6318 0.9309 1.4800 5.1616 51452.88 1.0002
tau 1.6699 0.0017 0.5069 0.9025 1.3220 1.5939 1.9307 2.8713 85869.62 1.0000
t[1] -0.6720 0.0007 0.2467 -1.1452 -0.8320 -0.6782 -0.5164 -0.1659 143712.64 1.0000
t[2] 0.6720 0.0007 0.2467 0.1659 0.5164 0.6782 0.8320 1.1452 143712.64 1.0000
lp__ -55.8424 0.0182 3.6592 -64.1114 -58.0465 -55.4330 -53.2032 -49.8715 40396.39 1.0001

 

scroll to top

   

Note that to compute the (log) marginal likelihood for a Stan model, we need to specify the model in a certain way. Instead of using “~” signs for specifying distributions, we need to directly use the (log) density functions. The reason for this is that when using the “~” sign, constant terms are dropped which are not needed for sampling from the posterior. However, for computing the marginal likelihood, these constants need to be retained. For instance, instead of writing y ~ normal(mu, sigma); we would need to write target += normal_lpdf(y | mu, sigma); (Gronau, 2021). See also the discussion regarding the implicit uniform prior for computing the log marginal likelihood.

 

stancodeM1a <- "
data {
  int<lower=1> N;        // number of subjects
  int<lower=2> C;        // number of conditions
  // vector[C] Y[N];        // responses [removed features]
  array[N] vector[C] Y;  // responses [Stan 2.26+ syntax for array declarations]
  matrix[C,C-1] Q;       // projecting C fixed effects into C-1
  real<lower=0> ht;      // (square root) prior scale on the variability of the standardized treatment effects
  real<lower=0> hb;      // (square root) prior scale on the variability of the standardized subject-specific random effects
  real lwr;              // lower bound for the grand mean
  real upr;              // upper bound for the grand mean
}

parameters {
  real mu;              // grand mean
  real<lower=0> sigma;  // standard deviation of the error
  real<lower=0> gt;     // variance of the standardized projected fixed treatment effects
  real<lower=0> gb;     // variance of the standardized subject-specific random effects
  vector[C-1] tf;       // projected fixed treatment effects
  vector[N] b;          // subject-specific random effects
}

transformed parameters {
  real<lower=0> eta;    // sd of the projected fixed treatment effects
  real<lower=0> tau;    // sd of the subject-specific random effects
  vector[C] t;          // fixed treatment effects
  eta = sigma * sqrt(gt);
  tau = sigma * sqrt(gb);
  t = Q * tf;
}

model { // traditional style
  // linear mixed-effects model
  for (i in 1:N) {
    Y[i] ~ normal(mu + t + b[i], sigma);
  }
  tf ~ normal(0, eta);
  b ~ normal(0, tau);

  // priors
  // mu ~ implicit uniform prior
  mu ~ uniform(lwr, upr);
  target += -log(sigma);             // Jeffreys prior
  gt ~ scaled_inv_chi_square(1, ht);
  gb ~ scaled_inv_chi_square(1, hb); // Rouder et al. (2012)
}
"

stanmodelM1a <- stan_model(model_code=stancodeM1a)
stanfitM1a <- sampling(stanmodelM1a, data=datalistM1,
                       iter=50000, warmup=10000, chains=4, seed=277, refresh=0)

rstan::summary(stanfitM1a)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu 1.5425 0.0030 0.6085 0.3306 1.1642 1.5421 1.9230 2.7477 40568.46 1.0000
sigma 1.0510 0.0012 0.2735 0.6488 0.8537 1.0062 1.1997 1.7022 48980.81 1.0001
gt 10.5234 1.6789 457.2382 0.0887 0.3692 0.8589 2.2655 28.0892 74169.06 1.0000
gb 3.4219 0.0126 3.1326 0.4317 1.4573 2.5730 4.3529 11.4506 62129.53 1.0001
tf[1] -0.9515 0.0009 0.3504 -1.6268 -1.1786 -0.9622 -0.7309 -0.2346 137333.73 1.0000
b[1] -0.1938 0.0032 0.8439 -1.8716 -0.7372 -0.1933 0.3493 1.4684 69790.04 1.0000
b[2] -1.5616 0.0034 0.8770 -3.3097 -2.1303 -1.5608 -0.9883 0.1607 66934.72 1.0000
b[3] -0.8795 0.0032 0.8502 -2.5822 -1.4278 -0.8775 -0.3284 0.7960 68916.69 1.0000
b[4] -1.6831 0.0034 0.8860 -3.4397 -2.2572 -1.6840 -1.1013 0.0580 67867.92 1.0000
b[5] -1.3218 0.0033 0.8700 -3.0498 -1.8833 -1.3175 -0.7562 0.3815 67512.84 1.0000
b[6] 1.8940 0.0035 0.8957 0.1396 1.3082 1.8942 2.4770 3.6672 66563.59 1.0000
b[7] 2.4578 0.0037 0.9285 0.6017 1.8527 2.4654 3.0631 4.2832 64594.57 1.0000
b[8] -0.2726 0.0032 0.8455 -1.9510 -0.8162 -0.2706 0.2728 1.4003 70731.43 1.0000
b[9] 0.6079 0.0032 0.8485 -1.0543 0.0537 0.6030 1.1553 2.3051 69301.96 1.0000
b[10] 0.9298 0.0033 0.8544 -0.7381 0.3733 0.9254 1.4788 2.6332 68189.18 1.0000
eta 1.4141 0.0122 2.9094 0.3372 0.6319 0.9313 1.4834 5.2005 57081.22 1.0001
tau 1.6726 0.0018 0.5117 0.9053 1.3218 1.5953 1.9340 2.8951 84443.88 1.0000
t[1] -0.6728 0.0007 0.2478 -1.1503 -0.8334 -0.6804 -0.5168 -0.1659 137333.73 1.0000
t[2] 0.6728 0.0007 0.2478 0.1659 0.5168 0.6804 0.8334 1.1503 137333.73 1.0000
lp__ -21.6296 0.0188 3.6733 -29.9007 -23.8201 -21.2217 -18.9824 -15.6813 38269.72 1.0001
N <- datalistM1$N; C <- datalistM1$C
ht <- datalistM1$ht; hb <- datalistM1$hb; upr <- datalistM1$upr; lwr <- datalistM1$lwr
const <- log(2*pi) * (-N*C - N - C - 1) / 2 + log(ht) + log(hb) - log(upr-lwr)
head(extract(stanfitM1)$lp__)
## [1] -52.47735 -56.44438 -57.33944 -53.01560 -61.67469 -60.59133
head(extract(stanfitM1a)$lp__) + const
## [1] -53.33232 -53.78627 -57.50837 -50.76387 -52.50371 -55.92028

In this case, lp__ is the log posterior density (“up to a constant” = ) minus \(\underset{\color{lightgray}{\text{Eq. 1}}}{N\times \ln\left(\frac{1}{2\pi}\right)^{\frac{C}{2}}}+\underset{\color{lightgray}{\text{Eq. 8}}}{\ln\left(\frac{1}{2\pi}\right)^{\frac{N}{2}}}+\underset{\color{lightgray}{\text{Eq. 7}}}{\ln\left(\frac{1}{2\pi}\right)^{\frac{C-1}{2}}}+\underset{\color{lightgray}{\text{Eq. 11}}}{\ln\left(\frac{h_t}{\sqrt{2\pi}}\right)}+\underset{\color{lightgray}{\text{Eq. 12}}}{\ln\left(\frac{h_b}{\sqrt{2\pi}}\right)}+\underset{\color{lightgray}{\text{Eq. 6}}}{\frac{1}{\mathrm{upr}-\mathrm{lwr}}}\). \(\qquad\)(Help us verify!)

 

  • The log posterior density: \(\ln p(\theta\mid y)=\ln p(y\mid\theta)+\ln p(\theta)-\ln p(y)\)

lp__ was described as “log density up to a constant” in the manual. Here, log density should be log likelihood of the observations conditioned on the posterior parameters: p(y | p_post). In the MCMC sampling paradigm, this could be obtained by plugging the parameter samples in each iteration into the likelihood and compute the probability of each observation. Therefore, “lp” is actually a vector in length equals to the number of observations. lp__ in Stan’s output would be the sum of the “lp” vector, which essentially quantifies how well the model match the data, hereby useful for model comparison purposes (Wang, 2015).

  • The log marginal likelihood: \(\ln p(y)=\ln\int p(y\mid\theta)\cdot p(\theta)\text{d}\theta\)

 

scroll to top

   

Null Model

 

Specify the null model \(\mathcal{M}_0\) in Eq. 1. Compile and fit the model in Stan.

stancodeM0 <- "
data {
  int<lower=1> N;        // number of subjects
  int<lower=2> C;        // number of conditions
  // vector[C] Y[N];        // responses [removed features]
  array[N] vector[C] Y;  // responses [Stan 2.26+ syntax for array declarations]
  real<lower=0> hb;      // (square root) prior scale on the variability of the standardized subject-specific random effects
  real lwr;              // lower bound for the grand mean
  real upr;              // upper bound for the grand mean
}

parameters {
  real mu;              // grand mean
  real<lower=0> sigma;  // standard deviation of the error
  real<lower=0> gb;     // variance of the standardized subject-specific random effects
  vector[N] b;          // subject-specific random effects
}

transformed parameters {
  real<lower=0> tau;    // sd of the subject-specific random effects
  tau = sigma * sqrt(gb);
}

model {
  target += uniform_lpdf(mu | lwr, upr);
  target += -log(sigma);                               // Jeffreys prior
  target += scaled_inv_chi_square_lpdf(gb | 1, hb);    // Rouder et al. (2012)
  target += normal_lpdf(b | 0, tau);
  for (i in 1:N) {
    target += normal_lpdf(Y[i] | mu + b[i], sigma);
  }
}
"

stanmodelM0 <- stan_model(model_code=stancodeM0)
stanfitM0 <- sampling(stanmodelM0, data=datalistM1[-c(4,5)],
                      iter=50000, warmup=10000, chains=4, seed=277, refresh=0)

rstan::summary(stanfitM0)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu 1.5414 0.0024 0.6224 0.3019 1.1472 1.5431 1.9359 2.7779 64635.16 1.0000
sigma 1.4830 0.0012 0.3114 0.9919 1.2605 1.4433 1.6617 2.2010 72162.95 1.0000
gb 1.4355 0.0049 1.3088 0.2189 0.6184 1.0695 1.8057 4.8387 72396.89 1.0000
b[1] -0.1609 0.0028 0.9592 -2.0748 -0.7771 -0.1572 0.4539 1.7468 113422.05 1.0000
b[2] -1.2800 0.0032 1.0026 -3.3155 -1.9266 -1.2598 -0.6111 0.6317 100836.77 1.0000
b[3] -0.7165 0.0029 0.9703 -2.6734 -1.3415 -0.7006 -0.0808 1.1752 109457.93 1.0000
b[4] -1.3781 0.0032 1.0090 -3.4260 -2.0344 -1.3522 -0.7026 0.5456 99791.82 1.0000
b[5] -1.0810 0.0031 0.9859 -3.0828 -1.7156 -1.0566 -0.4245 0.8043 102879.75 1.0000
b[6] 1.5531 0.0033 1.0281 -0.4008 0.8636 1.5269 2.2189 3.6531 94992.19 1.0000
b[7] 2.0126 0.0037 1.0767 -0.0327 1.2836 1.9898 2.7191 4.1902 86669.79 1.0000
b[8] -0.2273 0.0028 0.9524 -2.1292 -0.8422 -0.2234 0.3898 1.6593 112544.69 1.0000
b[9] 0.4972 0.0029 0.9626 -1.3850 -0.1275 0.4892 1.1107 2.4334 110967.91 1.0000
b[10] 0.7658 0.0030 0.9681 -1.1104 0.1289 0.7482 1.3878 2.7314 104322.02 1.0000
tau 1.5707 0.0019 0.5259 0.7888 1.2025 1.4903 1.8426 2.8294 77074.27 1.0000
lp__ -58.8016 0.0143 3.1595 -66.1468 -60.6284 -58.3858 -56.5122 -53.8815 48980.42 1.0001

 

scroll to top

   

Bayes Factor via Bridge Sampling

 

Compute log marginal likelihoods for full and null models via bridge sampling. Then, \(\textit{BF}_{10}=\frac{p(\text{Data}\ \mid\ \mathcal{M}_1)}{p(\text{Data}\ \mid\ \mathcal{M}_0)}\).

M1 <- bridge_sampler(stanfitM1, silent=T)
summary(M1)
## 
## Bridge sampling log marginal likelihood estimate 
## (method = "normal", repetitions = 1):
## 
##  -42.48485
## 
## Error Measures:
## 
##  Relative Mean-Squared Error: 5.182254e-06
##  Coefficient of Variation: 0.002276456
##  Percentage Error: 0%
## 
## Note:
## All error measures are approximate.
M0 <- bridge_sampler(stanfitM0, silent=T)
summary(M0)
## 
## Bridge sampling log marginal likelihood estimate 
## (method = "normal", repetitions = 1):
## 
##  -44.94092
## 
## Error Measures:
## 
##  Relative Mean-Squared Error: 2.883494e-06
##  Coefficient of Variation: 0.001698085
##  Percentage Error: 0%
## 
## Note:
## All error measures are approximate.
bf(M1, M0)
## Estimated Bayes factor in favor of M1 over M0: 11.65892

 

scroll to top

   

inla Priors

 

\(\mu\sim\mathcal{N}(0,\ \color{gray}{\text{variance = }}+\infty)\)

\(\alpha_i\sim\mathcal{N}(0,\ \color{gray}{\text{variance = }}0.001^{-1})\)

\(s_k\mid g_1\sim\mathcal{N}(0,\ \color{gray}{\text{variance = }}g_1)\)

\(g_1^{-1}\sim\text{Gamma}(1,\ \color{gray}{\text{rate = }}0.00005)\)

\(\sigma_\epsilon^{-2}\sim\text{Gamma}(1,\ \color{gray}{\text{rate = }}0.00005)\)

 

Stan Version 2.32.2

Full Model

 

Specify the full model \(\mathcal{M}_1\) through Eq. 1 and the gamma priors used in R-INLA.

inlacodeM1 <- "
data {
  int<lower=1> N;        // number of subjects
  int<lower=2> C;        // number of conditions
  // vector[C] Y[N];        // responses [removed features]
  array[N] vector[C] Y;  // responses [Stan 2.26+ syntax for array declarations]
  real<lower=0> shape;   // shape parameter of the inverse-gamma hyperprior
  real<lower=0> scale;   // scale parameter of the inverse-gamma hyperprior
}

parameters {
  real mu;              // grand mean
  real<lower=0> sigma2; // variance of the error
  real<lower=0> gb;     // variance of the subject-specific random effects
  vector[C] t;          // treatment effects
  vector[N] b;          // subject-specific random effects
}

model {
  target += normal_lpdf(mu | 0, sqrt(1000));
  target += normal_lpdf(t | 0, sqrt(1000));
  target += normal_lpdf(b | 0, sqrt(gb));
  // X ~ Gamma(a, rate = b)  <==> 1/X ~ Inv-Gamma(a, scale = b)
  target += inv_gamma_lpdf(gb | shape, scale);
  target += inv_gamma_lpdf(sigma2 | shape, scale);
  for (i in 1:N) {
    target += normal_lpdf(Y[i] | mu + t + b[i], sqrt(sigma2));
  }
}
"

inlamodelM1 <- stan_model(model_code=inlacodeM1)
dat <- cbind(sleep$extra[1:10], sleep$extra[11:20]) #wide format
datalist <- list("N"=nrow(dat), "C"=ncol(dat), "Y"=dat, "shape"=1, "scale"=0.00005)
inlafitM1 <- sampling(inlamodelM1, data=datalist,
                      iter=50000, warmup=10000, chains=4, seed=277, refresh=0,
                      control=list(adapt_delta=0.95, max_treedepth=15))
## Warning: There were 2609 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: There were 4 chains where the estimated Bayesian Fraction of Missing Information was low. See
## https://mc-stan.org/misc/warnings.html#bfmi-low
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
rstan::summary(inlafitM1)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu 1.1474 0.1110 18.2358 -34.3937 -11.2192 1.0240 13.4130 36.9119 26982.2266 1.0001
sigma2 3.5090 0.0315 1.3509 1.0311 2.6481 3.3077 4.1550 6.7545 1839.8623 1.0008
gb 0.0928 0.0336 0.5787 0.0000 0.0000 0.0001 0.0002 1.6543 296.7500 1.0062
t[1] -0.3963 0.1110 18.2385 -36.1635 -12.6427 -0.2727 11.9813 35.1630 27015.0873 1.0001
t[2] 1.1831 0.1109 18.2368 -34.6055 -11.0509 1.2936 13.5693 36.7062 27051.8033 1.0001
b[1] -0.0090 0.0045 0.1637 -0.0785 -0.0062 -0.0001 0.0059 0.0482 1305.0008 1.0026
b[2] -0.0569 0.0205 0.3300 -1.1489 -0.0069 -0.0005 0.0054 0.0297 259.6935 1.0073
b[3] -0.0352 0.0126 0.2381 -0.5221 -0.0067 -0.0004 0.0055 0.0327 355.3115 1.0065
b[4] -0.0659 0.0248 0.3752 -1.3549 -0.0068 -0.0005 0.0053 0.0295 229.4127 1.0092
b[5] -0.0508 0.0187 0.3014 -1.0024 -0.0068 -0.0005 0.0054 0.0306 259.9161 1.0079
b[6] 0.0648 0.0227 0.3623 -0.0296 -0.0053 0.0005 0.0069 1.3301 254.1309 1.0061
b[7] 0.0885 0.0322 0.4784 -0.0285 -0.0053 0.0006 0.0070 2.0714 220.5887 1.0081
b[8] -0.0108 0.0037 0.1526 -0.1178 -0.0063 -0.0001 0.0058 0.0432 1710.2283 1.0014
b[9] 0.0197 0.0067 0.1781 -0.0365 -0.0057 0.0003 0.0065 0.1859 716.7570 1.0020
b[10] 0.0326 0.0116 0.2247 -0.0332 -0.0056 0.0003 0.0067 0.4447 375.7006 1.0043
lp__ -37.2466 0.6244 11.2405 -78.0931 -40.1797 -34.4823 -30.4165 -25.2075 324.1291 1.0058

 

scroll to top

   

Null Model

 

Specify the null model \(\mathcal{M}_0\) in Eq. 1. Compile and fit the model in Stan.

inlacodeM0 <- "
data {
  int<lower=1> N;        // number of subjects
  int<lower=2> C;        // number of conditions
  // vector[C] Y[N];        // responses [removed features]
  array[N] vector[C] Y;  // responses [Stan 2.26+ syntax for array declarations]
  real<lower=0> shape;   // shape parameter of the inverse-gamma hyperprior
  real<lower=0> scale;   // scale parameter of the inverse-gamma hyperprior
}

parameters {
  real mu;              // grand mean
  real<lower=0> sigma2; // variance of the error
  real<lower=0> gb;     // variance of the subject-specific random effects
  vector[N] b;          // subject-specific random effects
}

model {
  target += normal_lpdf(mu | 0, sqrt(1000));
  target += normal_lpdf(b | 0, sqrt(gb));
  // X ~ Gamma(a, rate = b)  <==> 1/X ~ Inv-Gamma(a, scale = b)
  target += inv_gamma_lpdf(gb | shape, scale);
  target += inv_gamma_lpdf(sigma2 | shape, scale);
  for (i in 1:N) {
    target += normal_lpdf(Y[i] | mu + b[i], sqrt(sigma2));
  }
}
"

inlamodelM0 <- stan_model(model_code=inlacodeM0)
inlafitM0 <- sampling(inlamodelM0, data=datalist,
                      iter=50000, warmup=10000, chains=4, seed=277, refresh=0,
                      control=list(adapt_delta=0.95, max_treedepth=15))
rstan::summary(inlafitM0)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu 1.5367 0.0020 0.4525 0.6418 1.2413 1.5377 1.8322 2.4324 52340.535 1.0001
sigma2 4.0785 0.0063 1.4036 2.1730 3.1031 3.8099 4.7467 7.5480 50270.297 1.0001
gb 0.0010 0.0004 0.0385 0.0000 0.0000 0.0001 0.0002 0.0019 8260.090 1.0004
b[1] 0.0000 0.0001 0.0249 -0.0305 -0.0058 0.0000 0.0058 0.0303 128180.810 1.0000
b[2] -0.0007 0.0003 0.0317 -0.0316 -0.0059 -0.0001 0.0057 0.0290 15366.105 1.0002
b[3] -0.0004 0.0002 0.0240 -0.0308 -0.0058 0.0000 0.0057 0.0296 25183.615 1.0001
b[4] -0.0009 0.0003 0.0326 -0.0314 -0.0060 -0.0001 0.0056 0.0286 13732.789 1.0002
b[5] -0.0005 0.0002 0.0280 -0.0309 -0.0060 -0.0001 0.0057 0.0297 20845.194 1.0001
b[6] 0.0010 0.0004 0.0377 -0.0289 -0.0056 0.0001 0.0060 0.0314 10404.059 1.0003
b[7] 0.0012 0.0005 0.0425 -0.0290 -0.0056 0.0001 0.0060 0.0327 8644.644 1.0004
b[8] 0.0000 0.0001 0.0252 -0.0305 -0.0058 0.0000 0.0058 0.0304 190492.179 1.0000
b[9] 0.0004 0.0002 0.0279 -0.0299 -0.0057 0.0001 0.0059 0.0311 31671.672 1.0001
b[10] 0.0006 0.0002 0.0288 -0.0298 -0.0056 0.0001 0.0059 0.0313 22085.791 1.0001
lp__ -27.6867 0.0841 7.3340 -45.5047 -31.4658 -26.3063 -22.4387 -17.4948 7602.004 1.0007

 

scroll to top

   

Bayes Factor via Bridge Sampling

 

Compute log marginal likelihoods for full and null models via bridge sampling. Then, \(\textit{BF}_{10}=\frac{p(\text{Data}\ \mid\ \mathcal{M}_1)}{p(\text{Data}\ \mid\ \mathcal{M}_0)}\).

inla.M1 <- bridge_sampler(inlafitM1, silent=T)
summary(inla.M1)
## 
## Bridge sampling log marginal likelihood estimate 
## (method = "normal", repetitions = 1):
## 
##  -59.85118
## 
## Error Measures:
## 
##  Relative Mean-Squared Error: 0.0330446
##  Coefficient of Variation: 0.1817817
##  Percentage Error: 18%
## 
## Note:
## All error measures are approximate.
inla.M0 <- bridge_sampler(inlafitM0, silent=T)
summary(inla.M0)
## 
## Bridge sampling log marginal likelihood estimate 
## (method = "normal", repetitions = 1):
## 
##  -57.69477
## 
## Error Measures:
## 
##  Relative Mean-Squared Error: 0.0001017732
##  Coefficient of Variation: 0.01008827
##  Percentage Error: 1%
## 
## Note:
## All error measures are approximate.
bf(inla.M1, inla.M0)
## Estimated Bayes factor in favor of inla.M1 over inla.M0: 0.11574

 

scroll to top

   

JAGS

 

work (not) in progress https://rpubs.com/sherloconan/1018077

   

R Scripts

 

library(ez) #4.4-0
library(lme4) #1.1-35.3
library(nlme) #3.1-164
library(BayesFactor) #0.9.12-4.7
if(!require(INLA)) { #https://www.r-inla.org/download-install
  install.packages("INLA",repos=c(getOption("repos"),INLA="https://inla.r-inla-download.org/R/stable"),dep=T)
  library(INLA) #24.05.10 (R version 4.4.0)
}
if(!require(rstan)) {install.packages("rstan"); library(rstan)} #2.32.6, macOS 13.6.7 M2
#do not use "bridgesampling" CRAN version 1.1-2 or lower
if(!require(devtools)) {install.packages("devtools")}
if(!require(bridgesampling)) {
  devtools::install_github("quentingronau/bridgesampling@master")
  library(bridgesampling) #1.1-5
}





## Paired or One-Sample t-Test.
group1 <- sleep$extra[1:10]; group2 <- sleep$extra[11:20]; resp_diff <- group1 - group2; N <- length(resp_diff)
t.test(resp_diff)
(fit_t <- t.test(group1, group2, paired=T))
ttest.tstat(t=fit_t$statistic, n1=N) # # convert t-statistic to ln(BF10)
ttest.tstat(t=fit_t$statistic, n1=N, simple=T) # convert t-statistic to BF10

shapiro.test(resp_diff) # check the normality assumption
wilcox.test(resp_diff, mu=0, alternative="two.sided")

n_perm <- 2000 # number of permutations
mean_diff <- numeric(n_perm)
for (i in 1:n_perm) {
  # randomly switch the group labels and then compute the mean difference
  set.seed(i)
  mean_diff[i] <- mean(resp_diff * sample(c(-1,1), N, replace=T))
}
hist(mean_diff, prob=T, col="white", yaxt="n", ylab="", xlab="Mean Differences (hour)",
     main=paste0(n_perm," Permutations of Raw Data"))
abline(v=mean(resp_diff), col="blue", lwd=3, lty=2)
lines(density(mean_diff), col="red", lwd=2)
sum(abs(mean_diff) >= abs(mean(resp_diff))) / n_perm # estimated p-value

mean_diff_exact <- colMeans(apply(expand.grid(rep(list(c(-1, 1)), N)),
                                  1, function(signs) resp_diff * signs))
hist(mean_diff_exact, prob=T, col="white", yaxt="n", ylab="", xlab="Mean Differences (hour)",
     main=substitute(2^exp * " Unique Permutations of Raw Data", list(exp=N)))
abline(v=mean(resp_diff), col="blue", lwd=3, lty=2)
lines(density(mean_diff_exact), col="red", lwd=2)
sum(abs(mean_diff_exact) >= abs(mean(resp_diff))) / (2^N) # exact p-value


## Repeated-Measures Analysis of Variance (RMANOVA)
summary(aov(extra ~ group + Error(ID/group), sleep)) # one-way RMANOVA
summary(aov(extra ~ group + ID, sleep)) # two-factor ANOVA
ezANOVA(sleep, dv=extra, wid=ID, within=.(group), type=2, detailed=T, return_aov=T)
?oneWayAOV.Fstat
friedman.test(extra ~ group | ID, sleep)


## Linear Mixed-Effects Regression (LMER)
summary(lmer(extra ~ group + (1 | ID), sleep, REML=F)) # random intercept for each subject
summary(lme(fixed=extra ~ group, random=~1 | ID, sleep, method="ML"))

fit1_LME <- lmer(extra ~ group + (1 | ID), sleep, REML=F)
fit0_LME <- lmer(extra ~         (1 | ID), sleep, REML=F)
exp((BIC(fit0_LME)-BIC(fit1_LME))/2) # Approx. BF10

getME(fit1_LME, "Z") # random-effects model matrix Z
kronecker(rep(1,2), diag(N)) # random-effects model matrix Z
model.matrix(fit1_LME) # design matrix for dummy (one-hot) coding
kronecker(diag(2), rep(1,N)) # design matrix for contrast (sum) coding

fit1_LMEr <- lme(fixed=extra ~ group, random=~1 | ID, sleep, method="ML")
fit0_LMEr <- lme(fixed=extra ~ 1,     random=~1 | ID, sleep, method="ML")
exp((BIC(fit0_LMEr)-BIC(fit1_LMEr))/2) # Approx. BF10


## Bayesian t-Test
ttestBF(resp_diff)


## Bayesian RMANOVA
set.seed(277)
(fit_anovaBF <- anovaBF(extra ~ group + ID, data=sleep, whichRandom="ID", progress=F))
set.seed(277)
chains <- posterior(fit_anovaBF, iterations=1e5, progress=F) # sample from the posterior distribution
summary(chains)
model.matrix(fit_anovaBF) # design matrices X[,-1]·Q and Z


## Integrated Nested Laplace Approximation (INLA)
fit1_INLA  <- inla(extra ~ 1 + group + f(ID, model="iid"), 
                   family="gaussian", data=sleep)
inla.set.control.fixed.default(fit1_INLA)[c("mean.intercept", "prec.intercept", "mean", "prec")] #default priors
summary(fit1_INLA)

fit0_INLA  <- inla(extra ~ 1 + f(ID, model="iid"), 
                   family="gaussian", data=sleep)
summary(fit0_INLA)

bf10 <- exp(fit1_INLA$mlik - fit0_INLA$mlik)
rownames(bf10) <- c("integration", "Gaussian"); colnames(bf10) <- "BF10.est"
bf10

## Maximal Set of Random Effects (MRE) Model
# formula <- extra ~ 1 + group + f(ID, model="iid") + f(ID:group, model="iid")

# Change the hyperpriors for the precisions of the random effects and error
shape_subj <- rate_subj <- shape_err <- rate_err <- 0.0001
fit1r_INLA  <- inla(extra ~ 1 + group + f(ID, model="iid",
                                          hyper=list(prec=list(param=c(shape_subj, rate_subj)))),
                    control.family=list(hyper=list(prec=list(param=c(shape_err, rate_err)))),
                    family="gaussian", data=sleep)
summary(fit1r_INLA)

fit0r_INLA  <- inla(extra ~ 1 + f(ID, model="iid",
                                  hyper=list(prec=list(param=c(shape_subj, rate_subj)))),
                    control.family=list(hyper=list(prec=list(param=c(shape_err, rate_err)))),
                    family="gaussian", data=sleep)
summary(fit0r_INLA)

bf10r <- exp(fit1r_INLA$mlik - fit0r_INLA$mlik)
rownames(bf10r) <- c("integration", "Gaussian"); colnames(bf10r) <- "BF10.est"
bf10r


## Stan
computeQ <- function(C) {
  #' reduced parametrization for the fixed treatment effects
  S <- diag(C) - matrix(1, nrow = C, ncol = C) / C
  e <- qr(S)
  Q <- qr.Q(e) %*% diag(sign(diag(qr.R(e))))
  Q[, which(abs(e$qraux) > 1e-3), drop = FALSE]
}

dat <- cbind(group1, group2) #wide format

stancodeM1 <- "
data {
  int<lower=1> N;        // number of subjects
  int<lower=2> C;        // number of conditions
  // vector[C] Y[N];        // responses [removed features]
  array[N] vector[C] Y;  // responses [Stan 2.26+ syntax for array declarations]
  matrix[C,C-1] Q;       // projecting C fixed effects into C-1
  real<lower=0> ht;      // (square root) prior scale on the variability of the standardized treatment effects
  real<lower=0> hb;      // (square root) prior scale on the variability of the standardized subject-specific random effects
  real lwr;              // lower bound for the grand mean
  real upr;              // upper bound for the grand mean
}

parameters {
  real mu;              // grand mean
  real<lower=0> sigma;  // standard deviation of the error
  real<lower=0> gt;     // variance of the standardized projected fixed treatment effects
  real<lower=0> gb;     // variance of the standardized subject-specific random effects
  vector[C-1] tf;       // projected fixed treatment effects
  vector[N] b;          // subject-specific random effects
}

transformed parameters {
  real<lower=0> eta;    // sd of the projected fixed treatment effects
  real<lower=0> tau;    // sd of the subject-specific random effects
  vector[C] t;          // fixed treatment effects
  eta = sigma * sqrt(gt);
  tau = sigma * sqrt(gb);
  t = Q * tf;
}

model {
  target += uniform_lpdf(mu | lwr, upr);
  target += -log(sigma);                               // Jeffreys prior
  target += scaled_inv_chi_square_lpdf(gt | 1, ht);
  target += scaled_inv_chi_square_lpdf(gb | 1, hb);    // Rouder et al. (2012)
  target += normal_lpdf(tf | 0, eta);
  target += normal_lpdf(b | 0, tau);
  for (i in 1:N) {
    target += normal_lpdf(Y[i] | mu + t + b[i], sigma);
  }
}
"

stanmodelM1 <- stan_model(model_code=stancodeM1)
datalistM1 <- list("N"=nrow(dat), "C"=ncol(dat), "Y"=dat,
                   "Q"=computeQ(ncol(dat)), "ht"=.5, "hb"=1)
datalistM1$lwr <- mean(datalistM1[["Y"]])-6*sd(datalistM1[["Y"]])
datalistM1$upr <- mean(datalistM1[["Y"]])+6*sd(datalistM1[["Y"]])
stanfitM1 <- sampling(stanmodelM1, data=datalistM1,
                      iter=50000, warmup=10000, chains=4, seed=277, refresh=0)

rstan::summary(stanfitM1)[[1]]


stancodeM0 <- "
data {
  int<lower=1> N;        // number of subjects
  int<lower=2> C;        // number of conditions
  // vector[C] Y[N];        // responses [removed features]
  array[N] vector[C] Y;  // responses [Stan 2.26+ syntax for array declarations]
  real<lower=0> hb;      // (square root) prior scale on the variability of the standardized subject-specific random effects
  real lwr;              // lower bound for the grand mean
  real upr;              // upper bound for the grand mean
}

parameters {
  real mu;              // grand mean
  real<lower=0> sigma;  // standard deviation of the error
  real<lower=0> gb;     // variance of the standardized subject-specific random effects
  vector[N] b;          // subject-specific random effects
}

transformed parameters {
  real<lower=0> tau;    // sd of the subject-specific random effects
  tau = sigma * sqrt(gb);
}

model {
  target += uniform_lpdf(mu | lwr, upr);
  target += -log(sigma);                               // Jeffreys prior
  target += scaled_inv_chi_square_lpdf(gb | 1, hb);    // Rouder et al. (2012)
  target += normal_lpdf(b | 0, tau);
  for (i in 1:N) {
    target += normal_lpdf(Y[i] | mu + b[i], sigma);
  }
}
"

stanmodelM0 <- stan_model(model_code=stancodeM0)
stanfitM0 <- sampling(stanmodelM0, data=datalistM1[-c(4,5)],
                      iter=50000, warmup=10000, chains=4, seed=277, refresh=0)

rstan::summary(stanfitM0)[[1]]


M1 <- bridge_sampler(stanfitM1, silent=T)
summary(M1)

M0 <- bridge_sampler(stanfitM0, silent=T)
summary(M0)

bf(M1, M0)

 

scroll to top