============================================================================================================ PREVIOUS: https://rpubs.com/sherloconan/1151407
THIS: https://rpubs.com/sherloconan/1184363
MORE: https://github.com/zhengxiaoUVic/Bayesian
Model
(for Aggregated Data, i.e.,
without the Highest-Order Repeated-Measures Interaction)
\[\begin{align*} \tag{1} \mathcal{M}_1:\ Y_{ik}&=\mu+s_k+\alpha_i+\epsilon_{ik}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ik}&=\mu+s_k+\epsilon_{ik}, \\ \epsilon_{ik}\overset{\text{i.i.d.}}{\sim}&\mathcal{N}(0,\sigma_\epsilon^2)\qquad\text{for subject } k=1,\dotsb,n,\text{ and condition } i=1,\dotsb,a. \end{align*}\]
A general matrix-vector model expression is given in Eq. 13 of the “R-INLA” section.
Data
sleep: The length of
increased sleep (in hours) after taking two drugs when compared to
regular nights where no drug was administered.
A data frame with 20 observations on three variables.
extra [numeric] increase in hours of sleep
group [factor] drug given (\(a=2\))
ID [factor] patient ID (\(n=10\) subjects)
group1 <- sleep$extra[1:10]; group2 <- sleep$extra[11:20]; resp_diff <- group1 - group2; N <- length(resp_diff)
ggplot(sleep, aes(x=group, y=extra)) +
geom_line(aes(group=ID), linewidth=.25) + # plot individual data
labs(x="Condition", y="Response (hour)") + theme_classic()
Analyses
\(p\)-value = 0.002833 and \(\mathit{BF}_{10}=\)| ttestBF | 17.25888 | Eq. 2-4 |
|---|---|---|
| anovaBF | 11.4676 | Eq. 1 & 6-12 |
| ttest.tstat (approx.) | 17.25888 | Eq. 5 |
| BIC (approx.) | 40.83649 | Eq. 16 |
| bridge_sampler | \(\exp\left\{-42.47931-(-44.93969)\right\}\approx11.70927\) | Eq. 1 & 6-12 |
| inla | \(\exp\left\{-59.42-(-60.40)\right\}\approx\) 2.661885 | Eq. 13-14 |
Note the impact of different model specifications (priors).
# t.test(resp_diff)
# t.test(extra ~ group, data=sleep, paired=T) # deprecated in R 4.4.0
(fit_t <- t.test(group1, group2, paired=T))
##
## Paired t-test
##
## data: group1 and group2
## t = -4.0621, df = 9, p-value = 0.002833
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -2.4598858 -0.7001142
## sample estimates:
## mean difference
## -1.58
BayesFactor::ttest.tstat(t=fit_t$statistic, n1=N, simple=T) # convert t-statistic to BF10
## B10
## 17.25888
Note: when simple=F (default), the
function returns \(\ln\mathit{BF}_{10}\).
shapiro.test(resp_diff) # check the normality assumption
##
## Shapiro-Wilk normality test
##
## data: resp_diff
## W = 0.82987, p-value = 0.03334
wilcox.test(resp_diff, mu=0, alternative="two.sided")
## Warning in wilcox.test.default(resp_diff, mu = 0, alternative = "two.sided"):
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(resp_diff, mu = 0, alternative = "two.sided"):
## cannot compute exact p-value with zeroes
##
## Wilcoxon signed rank test with continuity correction
##
## data: resp_diff
## V = 0, p-value = 0.009091
## alternative hypothesis: true location is not equal to 0
The test is used to determine whether the median of a single sample differs significantly from a specified value. It is especially useful when the data do not meet the assumptions of the \(t\)-test, such as normality.
n_perm <- 2000 # number of permutations
mean_diff <- numeric(n_perm)
for (i in 1:n_perm) {
# randomly switch the group labels and then compute the mean difference
set.seed(i)
mean_diff[i] <- mean(resp_diff * sample(c(-1,1), N, replace=T))
}
hist(mean_diff, prob=T, col="white", yaxt="n", ylab="", xlab="Mean Differences (hour)",
main=paste0(n_perm," Permutations of Raw Data"))
abline(v=mean(resp_diff), col="blue", lwd=3, lty=2)
lines(density(mean_diff), col="red", lwd=2)
sum(abs(mean_diff) >= abs(mean(resp_diff))) / n_perm # estimated p-value
## [1] 0.003
mean_diff_exact <- colMeans(apply(expand.grid(rep(list(c(-1, 1)), N)),
1, function(signs) resp_diff * signs))
hist(mean_diff_exact, prob=T, col="white", yaxt="n", ylab="", xlab="Mean Differences (hour)",
main=substitute(2^exp * " Unique Permutations of Raw Data", list(exp=N)))
abline(v=mean(resp_diff), col="blue", lwd=3, lty=2)
lines(density(mean_diff_exact), col="red", lwd=2)
sum(abs(mean_diff_exact) >= abs(mean(resp_diff))) / (2^N) # exact p-value
## [1] 0.00390625
Further reading:
May, R. B., Masson, M. E. J., & Hunter, M. A. (1989). Randomization tests: Viable alternatives to normal curve tests. Behavior Research Methods, Instruments & Computers, 21, 482–483. https://doi.org/10.3758/BF03202823
\(\sum_{i=1}^a\alpha_i=0\) and \(s_k\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,\sigma_s^2)\).
A specific form of two-factor ANOVA, where one factor is the subjects (random effect) and the other is the repeated measure (fixed effect), can be equivalent to one-way RMANOVA. They are also modeled and coded similarly to a one-way ANOVA with blocking in a randomized complete block design, though their interpretations may differ.
Nuisance factor: It affects the response, but we are not interested in it. For example, batches of raw materials. Sometimes, several sources of variability are combined in a block, so the block becomes an aggregate variable (Montgomery, 2019, ch. 4).
unknown and uncontrollable (“lurking”) - randomization
known and uncontrollable - analysis of covariance (ANCOVA)
known and controllable - blocking
(fit_RM <- summary(aov(extra ~ group + Error(ID/group), sleep))) # one-way RMANOVA
##
## Error: ID
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 9 58.08 6.453
##
## Error: ID:group
## Df Sum Sq Mean Sq F value Pr(>F)
## group 1 12.482 12.482 16.5 0.00283 **
## Residuals 9 6.808 0.756
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(extra ~ group + ID, sleep)) # two-factor ANOVA
## Df Sum Sq Mean Sq F value Pr(>F)
## group 1 12.48 12.482 16.501 0.00283 **
## ID 9 58.08 6.453 8.531 0.00190 **
## Residuals 9 6.81 0.756
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ez::ezANOVA(sleep, dv=extra, wid=ID, within=.(group), type=2, detailed=T, return_aov=T)
## $ANOVA
## Effect DFn DFd SSn SSd F p p<.05 ges
## 1 (Intercept) 1 9 47.432 58.078 7.350253 0.02395294 * 0.4223010
## 2 group 1 9 12.482 6.808 16.500881 0.00283289 * 0.1613329
##
## $aov
##
## Call:
## aov(formula = formula(aov_formula), data = data)
##
## Grand Mean: 1.54
##
## Stratum 1: ID
##
## Terms:
## Residuals
## Sum of Squares 58.078
## Deg. of Freedom 9
##
## Residual standard error: 2.540297
##
## Stratum 2: ID:group
##
## Terms:
## group Residuals
## Sum of Squares 12.482 6.808
## Deg. of Freedom 1 9
##
## Residual standard error: 0.8697381
## Estimated effects are balanced
fit_t$statistic^2 == unlist(fit_RM)["Error: ID:group.F value1"]
## t
## TRUE
\(\Longrightarrow\quad t^2=F\) and this result also holds for (unbalanced) two-sample \(t\)-test with equal variances \(\underline{\text{and}}\) one-way ANOVA with two conditions.
## NOTE: the toy data set and two-sample t-tests with equal variances
ttest_result <- t.test(extra ~ group, sleep, var.equal=T)
anova_result <- summary(aov(extra ~ group, sleep)) #balanced, n=10
options(digits=10)
ttest_result$statistic^2 #3.462626761
anova_result[[1]][["F value"]][1] #3.462626761
ttest_result <- t.test(extra ~ group, sleep[-5,], var.equal=T)
anova_result <- summary(aov(extra ~ group, sleep[-5,])) #unbalanced
ttest_result$statistic^2 #2.773136741
anova_result[[1]][["F value"]][1] #2.773136741
friedman.test(extra ~ group | ID, sleep)
##
## Friedman rank sum test
##
## data: extra and group and ID
## Friedman chi-squared = 9, df = 1, p-value = 0.0027
Linear Mixed-Effects Regression (LMER)
in Sum
Coding (i.e., contrasts each level with the grand mean)
\[\begin{equation*} \tag{1'} \mathbf{y}=\mu\mathbf{1}+\mathbf{Z}_s\boldsymbol{s}+\mathbf{X}_{\alpha}\boldsymbol{\alpha}+\boldsymbol{\epsilon},\quad\boldsymbol{\epsilon}\sim\boldsymbol{\mathcal{N}}_{an}(\boldsymbol{0},\ \sigma_\epsilon^2\!\cdot\!\mathbf{I}) \end{equation*}\]
\(\mathbf{Z}_s=\underset{\tiny\begin{array}{r} \text{Kronecker} \\ \text{product} \end{array}}{\mathbf{1}_{a}\otimes\mathbf{I}_n}\), \(\quad\ \mathbf{X}_{\alpha}=\mathbf{I}_a\otimes\mathbf{1}_{n}\),
\(Y_{ik}\), the response for subject \(k\) under condition \(i\). Note that changing the order of subscripts (\(Y_{ki}\)) requires adjusting the matrix operations.
\(\widehat{Y}_i=1.54+0.79\times\mathrm{Condition}_i\) for \(\mathrm{Condition}_1=-1\) and \(\mathrm{Condition}_2=1\).
\(\widehat{Y}_i=0.75+1.58\times\mathrm{Condition}_i\) for \(\mathrm{Condition}_1=0\) and \(\mathrm{Condition}_2=1\).
In dummy coding, each level of a categorical variable (except the reference level) is represented by a separate binary (0/1) variable.
# Random intercept for each subject
summary(lme4::lmer(extra ~ group + (1 | ID), sleep, REML=F))
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: extra ~ group + (1 | ID)
## Data: sleep
##
## AIC BIC logLik -2*log(L) df.resid
## 78.5 82.5 -35.3 70.5 16
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.72210 -0.36005 0.03527 0.33215 1.93804
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 2.5635 1.6011
## Residual 0.6808 0.8251
## Number of obs: 20, groups: ID, 10
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.7500 0.5696 1.317
## group2 1.5800 0.3690 4.282
##
## Correlation of Fixed Effects:
## (Intr)
## group2 -0.324
summary(nlme::lme(fixed=extra ~ group, random=~1 | ID, sleep, method="ML"))
## Linear mixed-effects model fit by maximum likelihood
## Data: sleep
## AIC BIC logLik
## 78.50469 82.48762 -35.25235
##
## Random effects:
## Formula: ~1 | ID
## (Intercept) Residual
## StdDev: 1.601093 0.8251061
##
## Fixed effects: extra ~ group
## Value Std.Error DF t-value p-value
## (Intercept) 0.75 0.6003980 9 1.249171 0.2431
## group2 1.58 0.3889587 9 4.062127 0.0028
## Correlation:
## (Intr)
## group2 -0.324
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -1.72209551 -0.36004711 0.03527154 0.33215115 1.93803983
##
## Number of Observations: 20
## Number of Groups: 10
Approximating Bayes Factors from Bayesian Information Criterion (BIC)
\[\begin{equation} \tag{16} \mathit{BF}_{10}\approx\exp\left\{\frac{\text{BIC}(\mathcal{M}_0)-\text{BIC}(\mathcal{M}_1)}{2}\right\} \end{equation}\]
Derivation in Wagenmakers (2007, Appendix B).
Discussion in Nathoo and Masson (2016).
fit1_LME <- lme4::lmer(extra ~ group + (1 | ID), sleep, REML=F)
fit0_LME <- lme4::lmer(extra ~ (1 | ID), sleep, REML=F)
exp((BIC(fit0_LME) - BIC(fit1_LME)) / 2) # Approx. BF10
## [1] 40.83649
fit1_LMEr <- nlme::lme(fixed=extra ~ group, random=~1 | ID, sleep, method="ML")
fit0_LMEr <- nlme::lme(fixed=extra ~ 1, random=~1 | ID, sleep, method="ML")
exp((BIC(fit0_LMEr) - BIC(fit1_LMEr)) / 2) # Approx. BF10
## [1] 40.83649
Need to investigate if these BIC values can be plugged in directly.
getME(fit1_LME, "Z") # random-effects model matrix Z
## 20 x 10 sparse Matrix of class "dgCMatrix"
## [[ suppressing 10 column names '1', '2', '3' ... ]]
##
## 1 1 . . . . . . . . .
## 2 . 1 . . . . . . . .
## 3 . . 1 . . . . . . .
## 4 . . . 1 . . . . . .
## 5 . . . . 1 . . . . .
## 6 . . . . . 1 . . . .
## 7 . . . . . . 1 . . .
## 8 . . . . . . . 1 . .
## 9 . . . . . . . . 1 .
## 10 . . . . . . . . . 1
## 11 1 . . . . . . . . .
## 12 . 1 . . . . . . . .
## 13 . . 1 . . . . . . .
## 14 . . . 1 . . . . . .
## 15 . . . . 1 . . . . .
## 16 . . . . . 1 . . . .
## 17 . . . . . . 1 . . .
## 18 . . . . . . . 1 . .
## 19 . . . . . . . . 1 .
## 20 . . . . . . . . . 1
kronecker(rep(1,2), diag(N)) # random-effects model matrix Z
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 1 0 0 0 0 0 0 0 0 0
## [2,] 0 1 0 0 0 0 0 0 0 0
## [3,] 0 0 1 0 0 0 0 0 0 0
## [4,] 0 0 0 1 0 0 0 0 0 0
## [5,] 0 0 0 0 1 0 0 0 0 0
## [6,] 0 0 0 0 0 1 0 0 0 0
## [7,] 0 0 0 0 0 0 1 0 0 0
## [8,] 0 0 0 0 0 0 0 1 0 0
## [9,] 0 0 0 0 0 0 0 0 1 0
## [10,] 0 0 0 0 0 0 0 0 0 1
## [11,] 1 0 0 0 0 0 0 0 0 0
## [12,] 0 1 0 0 0 0 0 0 0 0
## [13,] 0 0 1 0 0 0 0 0 0 0
## [14,] 0 0 0 1 0 0 0 0 0 0
## [15,] 0 0 0 0 1 0 0 0 0 0
## [16,] 0 0 0 0 0 1 0 0 0 0
## [17,] 0 0 0 0 0 0 1 0 0 0
## [18,] 0 0 0 0 0 0 0 1 0 0
## [19,] 0 0 0 0 0 0 0 0 1 0
## [20,] 0 0 0 0 0 0 0 0 0 1
model.matrix(fit1_LME) # design matrix for dummy (one-hot) coding
## (Intercept) group2
## 1 1 0
## 2 1 0
## 3 1 0
## 4 1 0
## 5 1 0
## 6 1 0
## 7 1 0
## 8 1 0
## 9 1 0
## 10 1 0
## 11 1 1
## 12 1 1
## 13 1 1
## 14 1 1
## 15 1 1
## 16 1 1
## 17 1 1
## 18 1 1
## 19 1 1
## 20 1 1
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$group
## [1] "contr.treatment"
##
## attr(,"msgScaleX")
## character(0)
kronecker(diag(2), rep(1,N)) # design matrix for contrast (sum) coding
## [,1] [,2]
## [1,] 1 0
## [2,] 1 0
## [3,] 1 0
## [4,] 1 0
## [5,] 1 0
## [6,] 1 0
## [7,] 1 0
## [8,] 1 0
## [9,] 1 0
## [10,] 1 0
## [11,] 0 1
## [12,] 0 1
## [13,] 0 1
## [14,] 0 1
## [15,] 0 1
## [16,] 0 1
## [17,] 0 1
## [18,] 0 1
## [19,] 0 1
## [20,] 0 1
Bayesian \(t\)-Test
\[\begin{equation*} \tag{2} y_i\overset{\text{i.i.d.}}{\sim}\mathcal{N}(\sigma_\epsilon \delta,\ \sigma_\epsilon^2)\quad\text{for }i=1,\dotsb,N,\text{ where }\delta=\mu\ /\ \sigma_\epsilon \end{equation*}\]
\[\begin{equation*} \tag{3} \pi(\sigma_\epsilon^2)\propto\frac{1}{\sigma_\epsilon^{2}} \end{equation*}\]
\[\begin{equation*} \tag{4} \delta\sim\text{Cauchy}(0,\ h)=\frac{h}{(\delta^2+h^2)\pi} \end{equation*}\]
The Jeffreys-Zellner-Siow (JZS) Bayes factor for the Bayesian \(t\)-test, \(\mathcal{H}_0:\ \delta=0\) versus \(\mathcal{H}_1:\ \delta\neq 0\), is computed by integrating across one dimension in Eq. 5 (Ly et al., 2016, p. 24; Rouder et al., 2012, p. 360; Rouder et al., 2009, p. 237; Wei, Nathoo, & Masson, 2023, p. 1762; see also the balanced one-way between-subjects ANOVA in Morey et al., 2011, p. 374), using Gaussian quadrature.
\[\begin{equation*} \tag{5} \textit{JZS-BF}_{10}=(2\pi)^{-\frac{1}{2}}h\left(1+\frac{t^2}{\nu}\right)^{\frac{\nu+1}{2}}\int_0^\infty(1+Ng)^{-\frac{1}{2}}\left(1+\frac{t^2}{(1+Ng)\nu}\right)^{-\frac{\nu+1}{2}}g^{-\frac{3}{2}}e^{-\frac{h^2}{2g}}\text{d}g, \end{equation*}\]
where the default scale \(h=\sqrt{2}/2\) (version 0.9.2 and later), the degrees of freedom \(\nu=N-1\), \(\quad t=\bar{y}\sqrt{N}/s_y\), \(\quad\bar{y}\) is the sample mean, and \(s_y\) is the sample standard deviation in a one-sample \(t\)-test.
Unlike the anovaBF function, the ttestBF function of the “BayesFactor” R package yields negligible Monte Carlo errors and does not require specifying the number of Markov chain Monte Carlo iterations.
BayesFactor::ttestBF(resp_diff)
## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 17.25888 ±0%
##
## Against denominator:
## Null, mu = 0
## ---
## Bayes factor type: BFoneSample, JZS
JZSBF10 <- function(h=sqrt(2)/2, t, N) {
#' Input -
#' h: the scale parameter of the Cauchy prior
#' t: the observed t test statistic
#' N: the sample size
#' Output - the JZS Bayes factor for a one-sample t-test
nu <- N - 1
coef <- (2*pi)^(-0.5) * h * (1+t^2/nu)^((nu+1)/2)
integrand <- function(g) {
(1+N*g)^(-0.5) * (1+t^2/((1+N*g)*nu))^(-(nu+1)/2) * g^(-1.5) * exp(-h^2/(2*g))
}
coef * integrate(integrand, lower=0, upper=Inf)$value
}
N <- 20 #sample size
t_obs <- seq(0,5,by=0.2) #observed t test statistics
NSim <- length(t_obs)
pVal <- BF10 <- numeric(NSim)
for (i in 1:NSim) {
pVal[i] <- pt(t_obs[i], df=N-1, lower.tail=F)*2
BF10[i] <- JZSBF10(t=t_obs[i], N=N)
}
par(mfrow=c(1,2))
plot(pVal, 1/BF10,
main="The Theoretical Relationship",
sub=bquote(italic(N) == .(N)),
xlab=expression(italic("p")~"-Value"),
ylab=expression(italic("BF")["01"])) #closely match the empirical results
abline(v=.05, col="red")
abline(h=c(1/3,3), lty=2)
plot(t_obs, BF10,
main="The Theoretical Relationship",
sub=bquote(italic(N) == .(N)),
xlab=expression("|"~italic("t")~"|-Statistic"),
ylab=expression(italic("BF")[10]))
\[ \begin{align*} &\lim_{N\to\infty}\textit{JZS-BF}_{10}\left(t(N),\ N\right)= \begin{cases} 0, & \text{if $\delta=0$} \\ \infty, & \text{if $\delta\neq0$} \end{cases} \\ \\ &\lim_{t\to\infty}\textit{JZS-BF}_{10}\left(t(N),\ N\right)=\infty,\quad\forall N \end{align*} \]
The resulting \(\textit{JZS-BF}_{10}\) avoids critical issues:
Bartlett’s paradox: the Bayes factor approaches zero as the prior variance increases.
Information paradox: the Bayes factor tends to be bounded, given overwhelming information.
Bayesian RMANOVA
Scaling \(\alpha_i=\sigma_\epsilon\cdot t_i\) and \(s_k=\sigma_\epsilon\cdot b_k\).
\[\begin{equation*} \tag{6} \pi(\mu,\sigma_\epsilon^2)\propto\frac{1}{\sigma_\epsilon^{2}} \end{equation*}\]
\[\begin{equation*} \tag{7} t_i^{\star}\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation*}\]
\[\begin{equation*} \tag{8} b_k\mid g_b\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g_b) \end{equation*}\]
\[\begin{equation*} \tag{9} (t_1^{\star},\dotsb,t_{a-1}^{\star})=(t_1,\dotsb,t_{a})\cdot\mathbf{Q} \end{equation*}\]
\[\begin{equation*} \tag{10} \mathbf{I}_a-a^{-1}\mathbf{J}_a=\mathbf{Q}\cdot\mathbf{Q^{\top}}, \end{equation*}\]
where \(\mathbf{Q}\) is an \(a\times (a-1)\) matrix of the \(a-1\) eigenvectors of unit length corresponding to the nonzero eigenvalues of the left side term in Eq. 10. For example, \(t^{\star}=(t_1-t_2)/\sqrt{2}\) when \(a=2\). In the other direction (give \(t_1+t_2=0\)), \(t_1=t^{\star}/\sqrt{2}\) and \(t_2=-t^{\star}/\sqrt{2}\).
\[\begin{equation*} \tag{11} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation*}\]
\[\begin{equation*} \tag{12} g_b\sim\text{Scale-inv-}\chi^2(1,h_b^2) \end{equation*}\]
By default, \(h=0.5\) for the fixed effects and \(h_b=1\) for the random effects.
Default functions in “BayesFactor” version 0.9.2 and later return the same Bayes factor estimates for the independent-samples \(t\)-test and one-way ANOVA with two conditions. However, those functions return systematically different estimates for the paired \(t\)-test and one-way RMANOVA with two conditions (see discussion and vignettes).
set.seed(277)
(fit_anovaBF <- BayesFactor::anovaBF(extra ~ group + ID, data=sleep, whichRandom="ID", progress=F))
## Bayes factor analysis
## --------------
## [1] group + ID : 11.4676 ±0.68%
##
## Against denominator:
## extra ~ ID
## ---
## Bayes factor type: BFlinearModel, JZS
set.seed(277)
chains <- BayesFactor::posterior(fit_anovaBF, iterations=1e5, progress=F)
summary(chains) # sample from the posterior distribution
##
## Iterations = 1:1e+05
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 1e+05
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## mu 1.5377 0.6045 0.0019116 0.001912
## group-1 -0.6719 0.2473 0.0007822 0.001045
## group-2 0.6719 0.2473 0.0007822 0.001045
## ID-1 -0.1900 0.8426 0.0026645 0.002678
## ID-2 -1.5586 0.8735 0.0027622 0.003546
## ID-3 -0.8780 0.8496 0.0026868 0.002872
## ID-4 -1.6788 0.8822 0.0027897 0.003723
## ID-5 -1.3207 0.8640 0.0027323 0.003252
## ID-6 1.8972 0.8953 0.0028311 0.003926
## ID-7 2.4646 0.9274 0.0029326 0.004672
## ID-8 -0.2691 0.8395 0.0026548 0.002679
## ID-9 0.6162 0.8426 0.0026645 0.002729
## ID-10 0.9319 0.8542 0.0027014 0.002966
## sig2 1.1762 0.6651 0.0021033 0.005588
## g_group 8.2156 250.8629 0.7932981 0.864024
## g_ID 3.4267 3.1089 0.0098311 0.021266
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## mu 0.32371 1.16217 1.5378 1.9145 2.74944
## group-1 -1.14470 -0.83322 -0.6799 -0.5160 -0.16008
## group-2 0.16008 0.51603 0.6799 0.8332 1.14470
## ID-1 -1.86264 -0.73402 -0.1856 0.3547 1.46986
## ID-2 -3.29995 -2.12177 -1.5567 -0.9892 0.17148
## ID-3 -2.57498 -1.42979 -0.8744 -0.3256 0.79468
## ID-4 -3.42780 -2.25151 -1.6776 -1.1035 0.04571
## ID-5 -3.04137 -1.88045 -1.3184 -0.7577 0.38116
## ID-6 0.13118 1.31545 1.8961 2.4733 3.67944
## ID-7 0.60364 1.86403 2.4774 3.0706 4.27811
## ID-8 -1.93423 -0.80958 -0.2693 0.2701 1.40519
## ID-9 -1.04457 0.07187 0.6094 1.1592 2.29900
## ID-10 -0.74731 0.37742 0.9293 1.4818 2.63244
## sig2 0.41972 0.72660 1.0076 1.4362 2.91251
## g_group 0.08898 0.36931 0.8654 2.2938 27.20602
## g_ID 0.43536 1.46264 2.5827 4.3739 11.47310
BayesFactor::model.matrix(fit_anovaBF) # design matrices X[,-1]·Q and Z
## group_redu_1 ID-1 ID-2 ID-3 ID-4 ID-5 ID-6 ID-7 ID-8 ID-9 ID-10
## 1 1 -0.7071068 1 0 0 0 0 0 0 0 0 0
## 2 1 -0.7071068 0 1 0 0 0 0 0 0 0 0
## 3 1 -0.7071068 0 0 1 0 0 0 0 0 0 0
## 4 1 -0.7071068 0 0 0 1 0 0 0 0 0 0
## 5 1 -0.7071068 0 0 0 0 1 0 0 0 0 0
## 6 1 -0.7071068 0 0 0 0 0 1 0 0 0 0
## 7 1 -0.7071068 0 0 0 0 0 0 1 0 0 0
## 8 1 -0.7071068 0 0 0 0 0 0 0 1 0 0
## 9 1 -0.7071068 0 0 0 0 0 0 0 0 1 0
## 10 1 -0.7071068 0 0 0 0 0 0 0 0 0 1
## 11 1 0.7071068 1 0 0 0 0 0 0 0 0 0
## 12 1 0.7071068 0 1 0 0 0 0 0 0 0 0
## 13 1 0.7071068 0 0 1 0 0 0 0 0 0 0
## 14 1 0.7071068 0 0 0 1 0 0 0 0 0 0
## 15 1 0.7071068 0 0 0 0 1 0 0 0 0 0
## 16 1 0.7071068 0 0 0 0 0 1 0 0 0 0
## 17 1 0.7071068 0 0 0 0 0 0 1 0 0 0
## 18 1 0.7071068 0 0 0 0 0 0 0 1 0 0
## 19 1 0.7071068 0 0 0 0 0 0 0 0 1 0
## 20 1 0.7071068 0 0 0 0 0 0 0 0 0 1
## attr(,"gMap")
## intercept group ID ID ID ID ID ID
## NA 0 1 1 1 1 1 1
## ID ID ID ID
## 1 1 1 1
Integrated Nested Laplace Approximation (INLA)
Gaussian Process (GP) Regression Model
\[\begin{align*} \tag{13}
\mathbf{y}=\mathbf{X}\boldsymbol{\beta}+&\mathbf{Z}\mathbf{b}+\boldsymbol{\epsilon}
\\
\mathbf{y}_k=&\mathbf{X}_k\boldsymbol{\beta}+\mathbf{Z}_k\mathbf{b}_k+\boldsymbol{\epsilon}_k
\\
&Y_{ik}=\underset{\tiny\color{gray}{\begin{array}{r}
\text{fixed} \\ \text{effects}
\end{array}}}{\mathbf{x}_{ik}^\top\boldsymbol{\beta}}+\underset{\tiny\color{gray}{\begin{array}{r}
\text{random} \\ \text{effects}
\end{array}}}{\mathbf{z}_{ik}^\top\mathbf{b}_k}+\epsilon_{ik},\quad\epsilon_{ik}\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,\
\sigma_\epsilon^2) \\
&\qquad\text{for }i=1,\dotsb,a;\quad k=1,\dotsb,n
\end{align*}\] where \(\mathbf{b}\) is a latent process capturing
the dependence. See f() in the INLA
formula.
\(\color{lightgray}{\underset{an\times1}{\mathbf{y}}\equiv(\mathbf{y}_1^\top,\dotsb,\mathbf{y}_n^\top)^\top\equiv\big((Y_{11},\dotsb,Y_{a1}),\dotsb,(Y_{1n},\dotsb,Y_{an})\big)^\top}\)
\(\color{lightgray}{\underset{an\times (a+1)}{\hspace{-1.4em}\mathbf{X}}\hspace{-0.9em}\equiv(\mathbf{X}_1^\top,\dotsb,\mathbf{X}_n^\top)^\top\equiv\big((\underset{(a+1)\times1}{\mathbf{x}_{11}},\dotsb,\mathbf{x}_{a1}),\dotsb,(\mathbf{x}_{1n},\dotsb,\mathbf{x}_{an})\big)^\top}\)
\(\color{lightgray}{\underset{an\times nq}{\hspace{-0.6em}\mathbf{Z}}\hspace{-0.9em}\equiv\text{diag}(\mathbf{Z}_1,\dotsb,\mathbf{Z}_n)\equiv\text{diag}\big((\underset{q\times1}{\mathbf{z}_{11}},\dotsb,\mathbf{z}_{a1})^\top,\dotsb,(\mathbf{z}_{1n},\dotsb,\mathbf{z}_{an})^\top\big)}\)
\(\color{lightgray}{\underset{nq\times1}{\mathbf{b}}\equiv(\mathbf{b}_1^\top,\dotsb,\mathbf{b}_n^\top)^\top\equiv\big((b_{11},\dotsb,b_{1q}),\dotsb,(b_{n1},\dotsb,b_{nq})\big)^\top\qquad\qquad\underset{(a+1)\times1}{\boldsymbol{\beta}}\equiv(\beta_0,\dotsb,\beta_{a})^\top}\)
Latent GP \[\begin{equation*} \boldsymbol{\mathcal{N}}_{an}\left(\mathbf{y}\mid\mathbf{X}\boldsymbol{\beta}+\mathbf{Z}\mathbf{b},\ \sigma_\epsilon^2\!\cdot\!\mathbf{I}\right)\ \times\ \boldsymbol{\mathcal{N}}_{nq}\left(\mathbf{b}\mid\mathbf{0},\ \mathbf{G}\right)\ \times\ \pi(\sigma_\epsilon^2,\ \mathbf{G}) \end{equation*}\]
Response GP \[\begin{equation*} \boldsymbol{\mathcal{N}}_{an}\left(\mathbf{y}\mid\mathbf{X}\boldsymbol{\beta},\ \mathbf{ZGZ}^\top+\sigma_\epsilon^2\!\cdot\!\mathbf{I}\right)\times\ \pi(\sigma_\epsilon^2,\ \mathbf{G}) \end{equation*}\]
iid: Independent and identically distributed Gaussian
random effect.\[\begin{equation*} \tag{14} \mathbf{b}\mid\mathbf{G}\sim\boldsymbol{\mathcal{N}}_{nq}\left(\mathbf{0},\ \mathbf{G}=\text{diag}(g_1\dotsb,g_q,\dotsb,g_1,\dotsb,g_q)\right)\ \perp \!\!\! \perp\ \boldsymbol{\epsilon}\mid\sigma_\epsilon^2\sim\boldsymbol{\mathcal{N}}_{an}\left(\mathbf{0},\ \sigma_\epsilon^2\!\cdot\!\mathbf{I}\right) \end{equation*}\]
By default,
the intercept \(\beta_0\sim\mathcal{N}(0,\
0^{-1})\) and the rest of the fixed effects \(\beta_i\sim\mathcal{N}(0,\ 0.001^{-1})\)
for \(i=1,\dotsb,a\)
(the base is
the precision, i.e., the inverse of the variance).
As for the error \(\epsilon_{ik}\) and subject-specific random effect in this case, precisions \(\sigma_\epsilon^{-2},\ g_1^{-1}\sim\text{Gamma}(1,\ \color{gray}{\text{rate = }}0.00005)=\text{Exp}(0.00005)\).
Equivalently, their variances \(\sigma_\epsilon^2,\ g_1\sim\text{Inv-Gamma}(1,\ \color{gray}{\text{scale = }}0.00005)\). Or, \(\sigma_\epsilon^2,\ g_1\sim\text{Scale-inv-}\chi^2(2,\ \color{gray}{h^2=}0.00005)\).
Do these priors always work? Probably not.
par(mfrow=c(1,2)); shape <- 1; rate <- 5E-5
curve(dgamma(x, shape=shape, rate=rate), 0.2, 1E5, xlab="Precision (the Inverse of the Variance)",
ylab="Density", main=paste0("Gamma Prior (Shape = ",shape,", Rate = ",rate,")"))
curve(dgamma(1/x, shape=shape, rate=rate) / x^2, 0, 5, xlab="Variance", ylab="Density",
main=paste0("Inverse-Gamma Prior (Shape = ",shape,", Scale = ",rate,")"))
fit1_INLA <- INLA::inla(extra ~ 1 + group + f(ID, model="iid"),
family="gaussian", data=sleep)
# See the default priors for the fixed effects
INLA::inla.set.control.fixed.default(fit1_INLA)[c("mean.intercept", "prec.intercept", "mean", "prec")]
## $mean.intercept
## [1] 0
##
## $prec.intercept
## [1] 0
##
## $mean
## [1] 0
##
## $prec
## [1] 0.001
summary(fit1_INLA)
## Time used:
## Pre = 0.585, Running = 0.203, Post = 0.00998, Total = 0.798
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) 0.75 0.577 -0.394 0.75 1.894 0.75 0
## group2 1.58 0.405 0.772 1.58 2.388 1.58 0
##
## Random effects:
## Name Model
## ID IID model
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant
## Precision for the Gaussian observations 1.654 0.746 0.618 1.516
## Precision for ID 0.508 0.269 0.172 0.449
## 0.975quant mode
## Precision for the Gaussian observations 3.49 1.264
## Precision for ID 1.20 0.351
##
## Marginal log-Likelihood: -59.42
## is computed
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
fit0_INLA <- INLA::inla(extra ~ 1 + f(ID, model="iid"),
family="gaussian", data=sleep)
summary(fit0_INLA)
## Time used:
## Pre = 0.523, Running = 0.176, Post = 0.00634, Total = 0.705
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) 1.54 0.547 0.451 1.54 2.629 1.54 0
##
## Random effects:
## Name Model
## ID IID model
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant
## Precision for the Gaussian observations 0.501 0.229 0.157 0.464
## Precision for ID 2.744 5.445 0.312 1.295
## 0.975quant mode
## Precision for the Gaussian observations 1.03 0.385
## Precision for ID 14.58 0.539
##
## Marginal log-Likelihood: -60.40
## is computed
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
bf10 <- exp(fit1_INLA$mlik - fit0_INLA$mlik)
rownames(bf10) <- c("integration", "Gaussian"); colnames(bf10) <- "BF10.est"
bf10
## BF10.est
## integration 6.850923
## Gaussian 2.661885
The Jeffreys prior for the standard deviation is \(\pi(\sigma)\propto\frac{1}{\sigma}\) or \(\pi(\ln\sigma)\propto1\) in Eq. 6.
# Change the hyperpriors for the precisions of the random effects and error
shape_subj <- rate_subj <- shape_err <- rate_err <- 0.0001
fit1r_INLA <- INLA::inla(extra ~ 1 + group + f(ID, model="iid",
hyper=list(prec=list(param=c(shape_subj, rate_subj)))),
control.family=list(hyper=list(prec=list(param=c(shape_err, rate_err)))),
family="gaussian", data=sleep)
summary(fit1r_INLA)
## Time used:
## Pre = 0.528, Running = 0.177, Post = 0.00692, Total = 0.713
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) 0.75 0.656 -0.554 0.75 2.054 0.75 0
## group2 1.58 0.441 0.698 1.58 2.462 1.58 0
##
## Random effects:
## Name Model
## ID IID model
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant
## Precision for the Gaussian observations 1.396 0.679 0.480 1.263
## Precision for ID 0.403 0.232 0.121 0.349
## 0.975quant mode
## Precision for the Gaussian observations 3.08 1.023
## Precision for ID 1.00 0.262
##
## Marginal log-Likelihood: -57.32
## is computed
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
fit0r_INLA <- INLA::inla(extra ~ 1 + f(ID, model="iid",
hyper=list(prec=list(param=c(shape_subj, rate_subj)))),
control.family=list(hyper=list(prec=list(param=c(shape_err, rate_err)))),
family="gaussian", data=sleep)
summary(fit0r_INLA)
## Time used:
## Pre = 0.544, Running = 0.175, Post = 0.00649, Total = 0.726
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) 1.54 0.604 0.336 1.54 2.744 1.54 0
##
## Random effects:
## Name Model
## ID IID model
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant
## Precision for the Gaussian observations 0.537 0.246 0.194 0.491
## Precision for ID 0.680 0.602 0.141 0.508
## 0.975quant mode
## Precision for the Gaussian observations 1.14 0.408
## Precision for ID 2.28 0.307
##
## Marginal log-Likelihood: -57.92
## is computed
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
bf10r <- exp(fit1r_INLA$mlik - fit0r_INLA$mlik)
rownames(bf10r) <- c("integration", "Gaussian"); colnames(bf10r) <- "BF10.est"
bf10r
## BF10.est
## integration 1.538954
## Gaussian 1.827413
Scaling \(\alpha_i=\sigma_\epsilon\cdot t_i\) and \(s_k=\sigma_\epsilon\cdot b_k\).
\[\begin{equation*} \tag{6} \pi(\mu,\sigma_\epsilon^2)\propto\frac{1}{\sigma_\epsilon^{2}} \end{equation*}\]
\[\begin{equation*} \tag{7} t_i^{\star}\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation*}\]
\[\begin{equation*} \tag{8} b_k\mid g_b\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g_b) \end{equation*}\]
\[\begin{equation*} \tag{9} (t_1^{\star},\dotsb,t_{a-1}^{\star})=(t_1,\dotsb,t_{a})\cdot\mathbf{Q} \end{equation*}\]
\[\begin{equation*} \tag{10} \mathbf{I}_a-a^{-1}\mathbf{J}_a=\mathbf{Q}\cdot\mathbf{Q^{\top}}, \end{equation*}\]
where \(\mathbf{Q}\) is an \(a\times (a-1)\) matrix of the \(a-1\) eigenvectors of unit length corresponding to the nonzero eigenvalues of the left side term in Eq. 10. For example, \(t^{\star}=(t_1-t_2)/\sqrt{2}\) when \(a=2\). In the other direction (give \(t_1+t_2=0\)), \(t_1=t^{\star}/\sqrt{2}\) and \(t_2=-t^{\star}/\sqrt{2}\)
\[\begin{equation*} \tag{11} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation*}\]
\[\begin{equation*} \tag{12} g_b\sim\text{Scale-inv-}\chi^2(1,h_b^2) \end{equation*}\]
By default, \(h=0.5\) for the fixed effects and \(h_b=1\) for the random effects.
Stan Version 2.32.2
computeQ <- function(C) {
#' reduced parametrization for the fixed treatment effects
S <- diag(C) - matrix(1, nrow = C, ncol = C) / C
e <- qr(S)
Q <- qr.Q(e) %*% diag(sign(diag(qr.R(e))))
Q[, which(abs(e$qraux) > 1e-3), drop = FALSE]
}
dat <- cbind(sleep$extra[1:10], sleep$extra[11:20]) #wide format
Specify the full model \(\mathcal{M}_1\) through Eq. 1 & 6-12, the same hierarchical specification in a Bayesian RMANOVA.
stancodeM1 <- "
data {
int<lower=1> N; // number of subjects
int<lower=2> C; // number of conditions
// vector[C] Y[N]; // responses [removed features]
array[N] vector[C] Y; // responses [Stan 2.26+ syntax for array declarations]
matrix[C,C-1] Q; // projecting C fixed effects into C-1
real<lower=0> ht; // (square root) prior scale on the variability of the standardized treatment effects
real<lower=0> hb; // (square root) prior scale on the variability of the standardized subject-specific random effects
real lwr; // lower bound for the grand mean
real upr; // upper bound for the grand mean
}
parameters {
real mu; // grand mean
real<lower=0> sigma; // standard deviation of the error
real<lower=0> gt; // variance of the standardized projected fixed treatment effects
real<lower=0> gb; // variance of the standardized subject-specific random effects
vector[C-1] tf; // projected fixed treatment effects
vector[N] b; // subject-specific random effects
}
transformed parameters {
real<lower=0> eta; // sd of the projected fixed treatment effects
real<lower=0> tau; // sd of the subject-specific random effects
vector[C] t; // fixed treatment effects
eta = sigma * sqrt(gt);
tau = sigma * sqrt(gb);
t = Q * tf;
}
model {
target += uniform_lpdf(mu | lwr, upr);
target += -log(sigma); // Jeffreys prior
target += scaled_inv_chi_square_lpdf(gt | 1, ht);
target += scaled_inv_chi_square_lpdf(gb | 1, hb); // Rouder et al. (2012)
target += normal_lpdf(tf | 0, eta);
target += normal_lpdf(b | 0, tau);
for (i in 1:N) {
target += normal_lpdf(Y[i] | mu + t + b[i], sigma);
}
}
"
stanmodelM1 <- stan_model(model_code=stancodeM1)
datalistM1 <- list("N"=nrow(dat), "C"=ncol(dat), "Y"=dat,
"Q"=computeQ(ncol(dat)), "ht"=.5, "hb"=1)
datalistM1$lwr <- mean(datalistM1[["Y"]])-6*sd(datalistM1[["Y"]])
datalistM1$upr <- mean(datalistM1[["Y"]])+6*sd(datalistM1[["Y"]])
stanfitM1 <- sampling(stanmodelM1, data=datalistM1,
iter=50000, warmup=10000, chains=4, seed=277, refresh=0)
rstan::summary(stanfitM1)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
| mean | se_mean | sd | 2.5% | 25% | 50% | 75% | 97.5% | n_eff | Rhat | |
|---|---|---|---|---|---|---|---|---|---|---|
| mu | 1.5429 | 0.0029 | 0.5998 | 0.3453 | 1.1668 | 1.5436 | 1.9202 | 2.7306 | 42632.96 | 1.0000 |
| sigma | 1.0525 | 0.0012 | 0.2753 | 0.6474 | 0.8549 | 1.0066 | 1.2030 | 1.7107 | 50887.75 | 1.0001 |
| gt | 24.5569 | 13.7133 | 3136.2398 | 0.0899 | 0.3682 | 0.8558 | 2.2434 | 27.8077 | 52303.94 | 1.0001 |
| gb | 3.4158 | 0.0125 | 3.1561 | 0.4290 | 1.4501 | 2.5554 | 4.3446 | 11.4854 | 63610.94 | 1.0000 |
| tf[1] | -0.9503 | 0.0009 | 0.3489 | -1.6196 | -1.1766 | -0.9592 | -0.7303 | -0.2346 | 143712.64 | 1.0000 |
| b[1] | -0.1964 | 0.0031 | 0.8369 | -1.8620 | -0.7365 | -0.1941 | 0.3461 | 1.4624 | 74187.22 | 1.0000 |
| b[2] | -1.5612 | 0.0033 | 0.8774 | -3.3033 | -2.1335 | -1.5616 | -0.9895 | 0.1741 | 70424.57 | 1.0000 |
| b[3] | -0.8774 | 0.0031 | 0.8452 | -2.5593 | -1.4226 | -0.8771 | -0.3269 | 0.7867 | 73447.63 | 1.0000 |
| b[4] | -1.6794 | 0.0033 | 0.8831 | -3.4211 | -2.2592 | -1.6793 | -1.1015 | 0.0727 | 70409.05 | 1.0001 |
| b[5] | -1.3187 | 0.0032 | 0.8633 | -3.0322 | -1.8805 | -1.3182 | -0.7529 | 0.3819 | 71953.32 | 1.0000 |
| b[6] | 1.8934 | 0.0034 | 0.8893 | 0.1398 | 1.3112 | 1.8952 | 2.4715 | 3.6574 | 69446.08 | 1.0000 |
| b[7] | 2.4534 | 0.0036 | 0.9275 | 0.6048 | 1.8519 | 2.4595 | 3.0588 | 4.2752 | 67362.03 | 1.0000 |
| b[8] | -0.2765 | 0.0031 | 0.8379 | -1.9454 | -0.8190 | -0.2769 | 0.2670 | 1.3893 | 73105.46 | 1.0000 |
| b[9] | 0.6095 | 0.0031 | 0.8422 | -1.0498 | 0.0617 | 0.6047 | 1.1569 | 2.2874 | 74140.44 | 1.0000 |
| b[10] | 0.9273 | 0.0032 | 0.8500 | -0.7428 | 0.3740 | 0.9209 | 1.4768 | 2.6212 | 72503.73 | 1.0000 |
| eta | 1.4274 | 0.0183 | 4.1534 | 0.3372 | 0.6318 | 0.9309 | 1.4800 | 5.1616 | 51452.88 | 1.0002 |
| tau | 1.6699 | 0.0017 | 0.5069 | 0.9025 | 1.3220 | 1.5939 | 1.9307 | 2.8713 | 85869.62 | 1.0000 |
| t[1] | -0.6720 | 0.0007 | 0.2467 | -1.1452 | -0.8320 | -0.6782 | -0.5164 | -0.1659 | 143712.64 | 1.0000 |
| t[2] | 0.6720 | 0.0007 | 0.2467 | 0.1659 | 0.5164 | 0.6782 | 0.8320 | 1.1452 | 143712.64 | 1.0000 |
| lp__ | -55.8424 | 0.0182 | 3.6592 | -64.1114 | -58.0465 | -55.4330 | -53.2032 | -49.8715 | 40396.39 | 1.0001 |
Note that to compute the (log) marginal likelihood
for a Stan model, we need to specify the model in a certain way. Instead
of using “~” signs for specifying distributions, we need to directly use
the (log) density functions. The reason for this is that when using the
“~” sign, constant terms are dropped which are not needed for sampling
from the posterior. However, for computing the marginal likelihood,
these constants need to be retained. For instance, instead of writing
y ~ normal(mu, sigma); we would need to write
target += normal_lpdf(y | mu, sigma); (Gronau, 2021).
See also the discussion regarding the implicit
uniform prior for computing the log marginal likelihood.
stancodeM1a <- "
data {
int<lower=1> N; // number of subjects
int<lower=2> C; // number of conditions
// vector[C] Y[N]; // responses [removed features]
array[N] vector[C] Y; // responses [Stan 2.26+ syntax for array declarations]
matrix[C,C-1] Q; // projecting C fixed effects into C-1
real<lower=0> ht; // (square root) prior scale on the variability of the standardized treatment effects
real<lower=0> hb; // (square root) prior scale on the variability of the standardized subject-specific random effects
real lwr; // lower bound for the grand mean
real upr; // upper bound for the grand mean
}
parameters {
real mu; // grand mean
real<lower=0> sigma; // standard deviation of the error
real<lower=0> gt; // variance of the standardized projected fixed treatment effects
real<lower=0> gb; // variance of the standardized subject-specific random effects
vector[C-1] tf; // projected fixed treatment effects
vector[N] b; // subject-specific random effects
}
transformed parameters {
real<lower=0> eta; // sd of the projected fixed treatment effects
real<lower=0> tau; // sd of the subject-specific random effects
vector[C] t; // fixed treatment effects
eta = sigma * sqrt(gt);
tau = sigma * sqrt(gb);
t = Q * tf;
}
model { // traditional style
// linear mixed-effects model
for (i in 1:N) {
Y[i] ~ normal(mu + t + b[i], sigma);
}
tf ~ normal(0, eta);
b ~ normal(0, tau);
// priors
// mu ~ implicit uniform prior
mu ~ uniform(lwr, upr);
target += -log(sigma); // Jeffreys prior
gt ~ scaled_inv_chi_square(1, ht);
gb ~ scaled_inv_chi_square(1, hb); // Rouder et al. (2012)
}
"
stanmodelM1a <- stan_model(model_code=stancodeM1a)
stanfitM1a <- sampling(stanmodelM1a, data=datalistM1,
iter=50000, warmup=10000, chains=4, seed=277, refresh=0)
rstan::summary(stanfitM1a)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
| mean | se_mean | sd | 2.5% | 25% | 50% | 75% | 97.5% | n_eff | Rhat | |
|---|---|---|---|---|---|---|---|---|---|---|
| mu | 1.5425 | 0.0030 | 0.6085 | 0.3306 | 1.1642 | 1.5421 | 1.9230 | 2.7477 | 40568.46 | 1.0000 |
| sigma | 1.0510 | 0.0012 | 0.2735 | 0.6488 | 0.8537 | 1.0062 | 1.1997 | 1.7022 | 48980.81 | 1.0001 |
| gt | 10.5234 | 1.6789 | 457.2382 | 0.0887 | 0.3692 | 0.8589 | 2.2655 | 28.0892 | 74169.06 | 1.0000 |
| gb | 3.4219 | 0.0126 | 3.1326 | 0.4317 | 1.4573 | 2.5730 | 4.3529 | 11.4506 | 62129.53 | 1.0001 |
| tf[1] | -0.9515 | 0.0009 | 0.3504 | -1.6268 | -1.1786 | -0.9622 | -0.7309 | -0.2346 | 137333.73 | 1.0000 |
| b[1] | -0.1938 | 0.0032 | 0.8439 | -1.8716 | -0.7372 | -0.1933 | 0.3493 | 1.4684 | 69790.04 | 1.0000 |
| b[2] | -1.5616 | 0.0034 | 0.8770 | -3.3097 | -2.1303 | -1.5608 | -0.9883 | 0.1607 | 66934.72 | 1.0000 |
| b[3] | -0.8795 | 0.0032 | 0.8502 | -2.5822 | -1.4278 | -0.8775 | -0.3284 | 0.7960 | 68916.69 | 1.0000 |
| b[4] | -1.6831 | 0.0034 | 0.8860 | -3.4397 | -2.2572 | -1.6840 | -1.1013 | 0.0580 | 67867.92 | 1.0000 |
| b[5] | -1.3218 | 0.0033 | 0.8700 | -3.0498 | -1.8833 | -1.3175 | -0.7562 | 0.3815 | 67512.84 | 1.0000 |
| b[6] | 1.8940 | 0.0035 | 0.8957 | 0.1396 | 1.3082 | 1.8942 | 2.4770 | 3.6672 | 66563.59 | 1.0000 |
| b[7] | 2.4578 | 0.0037 | 0.9285 | 0.6017 | 1.8527 | 2.4654 | 3.0631 | 4.2832 | 64594.57 | 1.0000 |
| b[8] | -0.2726 | 0.0032 | 0.8455 | -1.9510 | -0.8162 | -0.2706 | 0.2728 | 1.4003 | 70731.43 | 1.0000 |
| b[9] | 0.6079 | 0.0032 | 0.8485 | -1.0543 | 0.0537 | 0.6030 | 1.1553 | 2.3051 | 69301.96 | 1.0000 |
| b[10] | 0.9298 | 0.0033 | 0.8544 | -0.7381 | 0.3733 | 0.9254 | 1.4788 | 2.6332 | 68189.18 | 1.0000 |
| eta | 1.4141 | 0.0122 | 2.9094 | 0.3372 | 0.6319 | 0.9313 | 1.4834 | 5.2005 | 57081.22 | 1.0001 |
| tau | 1.6726 | 0.0018 | 0.5117 | 0.9053 | 1.3218 | 1.5953 | 1.9340 | 2.8951 | 84443.88 | 1.0000 |
| t[1] | -0.6728 | 0.0007 | 0.2478 | -1.1503 | -0.8334 | -0.6804 | -0.5168 | -0.1659 | 137333.73 | 1.0000 |
| t[2] | 0.6728 | 0.0007 | 0.2478 | 0.1659 | 0.5168 | 0.6804 | 0.8334 | 1.1503 | 137333.73 | 1.0000 |
| lp__ | -21.6296 | 0.0188 | 3.6733 | -29.9007 | -23.8201 | -21.2217 | -18.9824 | -15.6813 | 38269.72 | 1.0001 |
N <- datalistM1$N; C <- datalistM1$C
ht <- datalistM1$ht; hb <- datalistM1$hb; upr <- datalistM1$upr; lwr <- datalistM1$lwr
const <- log(2*pi) * (-N*C - N - C - 1) / 2 + log(ht) + log(hb) - log(upr-lwr)
head(extract(stanfitM1)$lp__)
## [1] -52.47735 -56.44438 -57.33944 -53.01560 -61.67469 -60.59133
head(extract(stanfitM1a)$lp__) + const
## [1] -53.33232 -53.78627 -57.50837 -50.76387 -52.50371 -55.92028
In this case, lp__ is the log posterior density (“up to
a constant” = ) minus \(\underset{\color{lightgray}{\text{Eq. 1}}}{N\times
\ln\left(\frac{1}{2\pi}\right)^{\frac{C}{2}}}+\underset{\color{lightgray}{\text{Eq.
8}}}{\ln\left(\frac{1}{2\pi}\right)^{\frac{N}{2}}}+\underset{\color{lightgray}{\text{Eq.
7}}}{\ln\left(\frac{1}{2\pi}\right)^{\frac{C-1}{2}}}+\underset{\color{lightgray}{\text{Eq.
11}}}{\ln\left(\frac{h_t}{\sqrt{2\pi}}\right)}+\underset{\color{lightgray}{\text{Eq.
12}}}{\ln\left(\frac{h_b}{\sqrt{2\pi}}\right)}+\underset{\color{lightgray}{\text{Eq.
6}}}{\frac{1}{\mathrm{upr}-\mathrm{lwr}}}\). \(\qquad\)(Help us
verify!)
lp__ was described as “log density up to a constant” in
the manual. Here, log density should be log likelihood of the
observations conditioned on the posterior parameters: p(y | p_post). In
the MCMC sampling paradigm, this could be obtained by plugging the
parameter samples in each iteration into the likelihood and compute the
probability of each observation. Therefore, “lp” is actually a vector in
length equals to the number of observations. lp__ in Stan’s
output would be the sum of the “lp” vector, which essentially quantifies
how well the model match the data, hereby useful for model comparison
purposes (Wang, 2015).
Specify the null model \(\mathcal{M}_0\) in Eq. 1. Compile and fit the model in Stan.
stancodeM0 <- "
data {
int<lower=1> N; // number of subjects
int<lower=2> C; // number of conditions
// vector[C] Y[N]; // responses [removed features]
array[N] vector[C] Y; // responses [Stan 2.26+ syntax for array declarations]
real<lower=0> hb; // (square root) prior scale on the variability of the standardized subject-specific random effects
real lwr; // lower bound for the grand mean
real upr; // upper bound for the grand mean
}
parameters {
real mu; // grand mean
real<lower=0> sigma; // standard deviation of the error
real<lower=0> gb; // variance of the standardized subject-specific random effects
vector[N] b; // subject-specific random effects
}
transformed parameters {
real<lower=0> tau; // sd of the subject-specific random effects
tau = sigma * sqrt(gb);
}
model {
target += uniform_lpdf(mu | lwr, upr);
target += -log(sigma); // Jeffreys prior
target += scaled_inv_chi_square_lpdf(gb | 1, hb); // Rouder et al. (2012)
target += normal_lpdf(b | 0, tau);
for (i in 1:N) {
target += normal_lpdf(Y[i] | mu + b[i], sigma);
}
}
"
stanmodelM0 <- stan_model(model_code=stancodeM0)
stanfitM0 <- sampling(stanmodelM0, data=datalistM1[-c(4,5)],
iter=50000, warmup=10000, chains=4, seed=277, refresh=0)
rstan::summary(stanfitM0)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
| mean | se_mean | sd | 2.5% | 25% | 50% | 75% | 97.5% | n_eff | Rhat | |
|---|---|---|---|---|---|---|---|---|---|---|
| mu | 1.5414 | 0.0024 | 0.6224 | 0.3019 | 1.1472 | 1.5431 | 1.9359 | 2.7779 | 64635.16 | 1.0000 |
| sigma | 1.4830 | 0.0012 | 0.3114 | 0.9919 | 1.2605 | 1.4433 | 1.6617 | 2.2010 | 72162.95 | 1.0000 |
| gb | 1.4355 | 0.0049 | 1.3088 | 0.2189 | 0.6184 | 1.0695 | 1.8057 | 4.8387 | 72396.89 | 1.0000 |
| b[1] | -0.1609 | 0.0028 | 0.9592 | -2.0748 | -0.7771 | -0.1572 | 0.4539 | 1.7468 | 113422.05 | 1.0000 |
| b[2] | -1.2800 | 0.0032 | 1.0026 | -3.3155 | -1.9266 | -1.2598 | -0.6111 | 0.6317 | 100836.77 | 1.0000 |
| b[3] | -0.7165 | 0.0029 | 0.9703 | -2.6734 | -1.3415 | -0.7006 | -0.0808 | 1.1752 | 109457.93 | 1.0000 |
| b[4] | -1.3781 | 0.0032 | 1.0090 | -3.4260 | -2.0344 | -1.3522 | -0.7026 | 0.5456 | 99791.82 | 1.0000 |
| b[5] | -1.0810 | 0.0031 | 0.9859 | -3.0828 | -1.7156 | -1.0566 | -0.4245 | 0.8043 | 102879.75 | 1.0000 |
| b[6] | 1.5531 | 0.0033 | 1.0281 | -0.4008 | 0.8636 | 1.5269 | 2.2189 | 3.6531 | 94992.19 | 1.0000 |
| b[7] | 2.0126 | 0.0037 | 1.0767 | -0.0327 | 1.2836 | 1.9898 | 2.7191 | 4.1902 | 86669.79 | 1.0000 |
| b[8] | -0.2273 | 0.0028 | 0.9524 | -2.1292 | -0.8422 | -0.2234 | 0.3898 | 1.6593 | 112544.69 | 1.0000 |
| b[9] | 0.4972 | 0.0029 | 0.9626 | -1.3850 | -0.1275 | 0.4892 | 1.1107 | 2.4334 | 110967.91 | 1.0000 |
| b[10] | 0.7658 | 0.0030 | 0.9681 | -1.1104 | 0.1289 | 0.7482 | 1.3878 | 2.7314 | 104322.02 | 1.0000 |
| tau | 1.5707 | 0.0019 | 0.5259 | 0.7888 | 1.2025 | 1.4903 | 1.8426 | 2.8294 | 77074.27 | 1.0000 |
| lp__ | -58.8016 | 0.0143 | 3.1595 | -66.1468 | -60.6284 | -58.3858 | -56.5122 | -53.8815 | 48980.42 | 1.0001 |
Compute log marginal likelihoods for full and null models via bridge sampling. Then, \(\textit{BF}_{10}=\frac{p(\text{Data}\ \mid\ \mathcal{M}_1)}{p(\text{Data}\ \mid\ \mathcal{M}_0)}\).
M1 <- bridge_sampler(stanfitM1, silent=T)
summary(M1)
##
## Bridge sampling log marginal likelihood estimate
## (method = "normal", repetitions = 1):
##
## -42.48485
##
## Error Measures:
##
## Relative Mean-Squared Error: 5.182254e-06
## Coefficient of Variation: 0.002276456
## Percentage Error: 0%
##
## Note:
## All error measures are approximate.
M0 <- bridge_sampler(stanfitM0, silent=T)
summary(M0)
##
## Bridge sampling log marginal likelihood estimate
## (method = "normal", repetitions = 1):
##
## -44.94092
##
## Error Measures:
##
## Relative Mean-Squared Error: 2.883494e-06
## Coefficient of Variation: 0.001698085
## Percentage Error: 0%
##
## Note:
## All error measures are approximate.
bf(M1, M0)
## Estimated Bayes factor in favor of M1 over M0: 11.65892
\(\mu\sim\mathcal{N}(0,\ \color{gray}{\text{variance = }}+\infty)\)
\(\alpha_i\sim\mathcal{N}(0,\ \color{gray}{\text{variance = }}0.001^{-1})\)
\(s_k\mid g_1\sim\mathcal{N}(0,\ \color{gray}{\text{variance = }}g_1)\)
\(g_1^{-1}\sim\text{Gamma}(1,\ \color{gray}{\text{rate = }}0.00005)\)
\(\sigma_\epsilon^{-2}\sim\text{Gamma}(1,\ \color{gray}{\text{rate = }}0.00005)\)
Stan Version 2.32.2
Specify the full model \(\mathcal{M}_1\) through Eq. 1 and the gamma priors used in R-INLA.
inlacodeM1 <- "
data {
int<lower=1> N; // number of subjects
int<lower=2> C; // number of conditions
// vector[C] Y[N]; // responses [removed features]
array[N] vector[C] Y; // responses [Stan 2.26+ syntax for array declarations]
real<lower=0> shape; // shape parameter of the inverse-gamma hyperprior
real<lower=0> scale; // scale parameter of the inverse-gamma hyperprior
}
parameters {
real mu; // grand mean
real<lower=0> sigma2; // variance of the error
real<lower=0> gb; // variance of the subject-specific random effects
vector[C] t; // treatment effects
vector[N] b; // subject-specific random effects
}
model {
target += normal_lpdf(mu | 0, sqrt(1000));
target += normal_lpdf(t | 0, sqrt(1000));
target += normal_lpdf(b | 0, sqrt(gb));
// X ~ Gamma(a, rate = b) <==> 1/X ~ Inv-Gamma(a, scale = b)
target += inv_gamma_lpdf(gb | shape, scale);
target += inv_gamma_lpdf(sigma2 | shape, scale);
for (i in 1:N) {
target += normal_lpdf(Y[i] | mu + t + b[i], sqrt(sigma2));
}
}
"
inlamodelM1 <- stan_model(model_code=inlacodeM1)
dat <- cbind(sleep$extra[1:10], sleep$extra[11:20]) #wide format
datalist <- list("N"=nrow(dat), "C"=ncol(dat), "Y"=dat, "shape"=1, "scale"=0.00005)
inlafitM1 <- sampling(inlamodelM1, data=datalist,
iter=50000, warmup=10000, chains=4, seed=277, refresh=0,
control=list(adapt_delta=0.95, max_treedepth=15))
## Warning: There were 2609 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: There were 4 chains where the estimated Bayesian Fraction of Missing Information was low. See
## https://mc-stan.org/misc/warnings.html#bfmi-low
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
rstan::summary(inlafitM1)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
| mean | se_mean | sd | 2.5% | 25% | 50% | 75% | 97.5% | n_eff | Rhat | |
|---|---|---|---|---|---|---|---|---|---|---|
| mu | 1.1474 | 0.1110 | 18.2358 | -34.3937 | -11.2192 | 1.0240 | 13.4130 | 36.9119 | 26982.2266 | 1.0001 |
| sigma2 | 3.5090 | 0.0315 | 1.3509 | 1.0311 | 2.6481 | 3.3077 | 4.1550 | 6.7545 | 1839.8623 | 1.0008 |
| gb | 0.0928 | 0.0336 | 0.5787 | 0.0000 | 0.0000 | 0.0001 | 0.0002 | 1.6543 | 296.7500 | 1.0062 |
| t[1] | -0.3963 | 0.1110 | 18.2385 | -36.1635 | -12.6427 | -0.2727 | 11.9813 | 35.1630 | 27015.0873 | 1.0001 |
| t[2] | 1.1831 | 0.1109 | 18.2368 | -34.6055 | -11.0509 | 1.2936 | 13.5693 | 36.7062 | 27051.8033 | 1.0001 |
| b[1] | -0.0090 | 0.0045 | 0.1637 | -0.0785 | -0.0062 | -0.0001 | 0.0059 | 0.0482 | 1305.0008 | 1.0026 |
| b[2] | -0.0569 | 0.0205 | 0.3300 | -1.1489 | -0.0069 | -0.0005 | 0.0054 | 0.0297 | 259.6935 | 1.0073 |
| b[3] | -0.0352 | 0.0126 | 0.2381 | -0.5221 | -0.0067 | -0.0004 | 0.0055 | 0.0327 | 355.3115 | 1.0065 |
| b[4] | -0.0659 | 0.0248 | 0.3752 | -1.3549 | -0.0068 | -0.0005 | 0.0053 | 0.0295 | 229.4127 | 1.0092 |
| b[5] | -0.0508 | 0.0187 | 0.3014 | -1.0024 | -0.0068 | -0.0005 | 0.0054 | 0.0306 | 259.9161 | 1.0079 |
| b[6] | 0.0648 | 0.0227 | 0.3623 | -0.0296 | -0.0053 | 0.0005 | 0.0069 | 1.3301 | 254.1309 | 1.0061 |
| b[7] | 0.0885 | 0.0322 | 0.4784 | -0.0285 | -0.0053 | 0.0006 | 0.0070 | 2.0714 | 220.5887 | 1.0081 |
| b[8] | -0.0108 | 0.0037 | 0.1526 | -0.1178 | -0.0063 | -0.0001 | 0.0058 | 0.0432 | 1710.2283 | 1.0014 |
| b[9] | 0.0197 | 0.0067 | 0.1781 | -0.0365 | -0.0057 | 0.0003 | 0.0065 | 0.1859 | 716.7570 | 1.0020 |
| b[10] | 0.0326 | 0.0116 | 0.2247 | -0.0332 | -0.0056 | 0.0003 | 0.0067 | 0.4447 | 375.7006 | 1.0043 |
| lp__ | -37.2466 | 0.6244 | 11.2405 | -78.0931 | -40.1797 | -34.4823 | -30.4165 | -25.2075 | 324.1291 | 1.0058 |
Specify the null model \(\mathcal{M}_0\) in Eq. 1. Compile and fit the model in Stan.
inlacodeM0 <- "
data {
int<lower=1> N; // number of subjects
int<lower=2> C; // number of conditions
// vector[C] Y[N]; // responses [removed features]
array[N] vector[C] Y; // responses [Stan 2.26+ syntax for array declarations]
real<lower=0> shape; // shape parameter of the inverse-gamma hyperprior
real<lower=0> scale; // scale parameter of the inverse-gamma hyperprior
}
parameters {
real mu; // grand mean
real<lower=0> sigma2; // variance of the error
real<lower=0> gb; // variance of the subject-specific random effects
vector[N] b; // subject-specific random effects
}
model {
target += normal_lpdf(mu | 0, sqrt(1000));
target += normal_lpdf(b | 0, sqrt(gb));
// X ~ Gamma(a, rate = b) <==> 1/X ~ Inv-Gamma(a, scale = b)
target += inv_gamma_lpdf(gb | shape, scale);
target += inv_gamma_lpdf(sigma2 | shape, scale);
for (i in 1:N) {
target += normal_lpdf(Y[i] | mu + b[i], sqrt(sigma2));
}
}
"
inlamodelM0 <- stan_model(model_code=inlacodeM0)
inlafitM0 <- sampling(inlamodelM0, data=datalist,
iter=50000, warmup=10000, chains=4, seed=277, refresh=0,
control=list(adapt_delta=0.95, max_treedepth=15))
rstan::summary(inlafitM0)[[1]] %>% kable(digits=4) %>% kable_classic(full_width=F)
| mean | se_mean | sd | 2.5% | 25% | 50% | 75% | 97.5% | n_eff | Rhat | |
|---|---|---|---|---|---|---|---|---|---|---|
| mu | 1.5367 | 0.0020 | 0.4525 | 0.6418 | 1.2413 | 1.5377 | 1.8322 | 2.4324 | 52340.535 | 1.0001 |
| sigma2 | 4.0785 | 0.0063 | 1.4036 | 2.1730 | 3.1031 | 3.8099 | 4.7467 | 7.5480 | 50270.297 | 1.0001 |
| gb | 0.0010 | 0.0004 | 0.0385 | 0.0000 | 0.0000 | 0.0001 | 0.0002 | 0.0019 | 8260.090 | 1.0004 |
| b[1] | 0.0000 | 0.0001 | 0.0249 | -0.0305 | -0.0058 | 0.0000 | 0.0058 | 0.0303 | 128180.810 | 1.0000 |
| b[2] | -0.0007 | 0.0003 | 0.0317 | -0.0316 | -0.0059 | -0.0001 | 0.0057 | 0.0290 | 15366.105 | 1.0002 |
| b[3] | -0.0004 | 0.0002 | 0.0240 | -0.0308 | -0.0058 | 0.0000 | 0.0057 | 0.0296 | 25183.615 | 1.0001 |
| b[4] | -0.0009 | 0.0003 | 0.0326 | -0.0314 | -0.0060 | -0.0001 | 0.0056 | 0.0286 | 13732.789 | 1.0002 |
| b[5] | -0.0005 | 0.0002 | 0.0280 | -0.0309 | -0.0060 | -0.0001 | 0.0057 | 0.0297 | 20845.194 | 1.0001 |
| b[6] | 0.0010 | 0.0004 | 0.0377 | -0.0289 | -0.0056 | 0.0001 | 0.0060 | 0.0314 | 10404.059 | 1.0003 |
| b[7] | 0.0012 | 0.0005 | 0.0425 | -0.0290 | -0.0056 | 0.0001 | 0.0060 | 0.0327 | 8644.644 | 1.0004 |
| b[8] | 0.0000 | 0.0001 | 0.0252 | -0.0305 | -0.0058 | 0.0000 | 0.0058 | 0.0304 | 190492.179 | 1.0000 |
| b[9] | 0.0004 | 0.0002 | 0.0279 | -0.0299 | -0.0057 | 0.0001 | 0.0059 | 0.0311 | 31671.672 | 1.0001 |
| b[10] | 0.0006 | 0.0002 | 0.0288 | -0.0298 | -0.0056 | 0.0001 | 0.0059 | 0.0313 | 22085.791 | 1.0001 |
| lp__ | -27.6867 | 0.0841 | 7.3340 | -45.5047 | -31.4658 | -26.3063 | -22.4387 | -17.4948 | 7602.004 | 1.0007 |
Compute log marginal likelihoods for full and null models via bridge sampling. Then, \(\textit{BF}_{10}=\frac{p(\text{Data}\ \mid\ \mathcal{M}_1)}{p(\text{Data}\ \mid\ \mathcal{M}_0)}\).
inla.M1 <- bridge_sampler(inlafitM1, silent=T)
summary(inla.M1)
##
## Bridge sampling log marginal likelihood estimate
## (method = "normal", repetitions = 1):
##
## -59.85118
##
## Error Measures:
##
## Relative Mean-Squared Error: 0.0330446
## Coefficient of Variation: 0.1817817
## Percentage Error: 18%
##
## Note:
## All error measures are approximate.
inla.M0 <- bridge_sampler(inlafitM0, silent=T)
summary(inla.M0)
##
## Bridge sampling log marginal likelihood estimate
## (method = "normal", repetitions = 1):
##
## -57.69477
##
## Error Measures:
##
## Relative Mean-Squared Error: 0.0001017732
## Coefficient of Variation: 0.01008827
## Percentage Error: 1%
##
## Note:
## All error measures are approximate.
bf(inla.M1, inla.M0)
## Estimated Bayes factor in favor of inla.M1 over inla.M0: 0.11574
library(ez) #4.4-0
library(lme4) #1.1-35.3
library(nlme) #3.1-164
library(BayesFactor) #0.9.12-4.7
if(!require(INLA)) { #https://www.r-inla.org/download-install
install.packages("INLA",repos=c(getOption("repos"),INLA="https://inla.r-inla-download.org/R/stable"),dep=T)
library(INLA) #24.05.10 (R version 4.4.0)
}
if(!require(rstan)) {install.packages("rstan"); library(rstan)} #2.32.6, macOS 13.6.7 M2
#do not use "bridgesampling" CRAN version 1.1-2 or lower
if(!require(devtools)) {install.packages("devtools")}
if(!require(bridgesampling)) {
devtools::install_github("quentingronau/bridgesampling@master")
library(bridgesampling) #1.1-5
}
## Paired or One-Sample t-Test.
group1 <- sleep$extra[1:10]; group2 <- sleep$extra[11:20]; resp_diff <- group1 - group2; N <- length(resp_diff)
t.test(resp_diff)
(fit_t <- t.test(group1, group2, paired=T))
ttest.tstat(t=fit_t$statistic, n1=N) # # convert t-statistic to ln(BF10)
ttest.tstat(t=fit_t$statistic, n1=N, simple=T) # convert t-statistic to BF10
shapiro.test(resp_diff) # check the normality assumption
wilcox.test(resp_diff, mu=0, alternative="two.sided")
n_perm <- 2000 # number of permutations
mean_diff <- numeric(n_perm)
for (i in 1:n_perm) {
# randomly switch the group labels and then compute the mean difference
set.seed(i)
mean_diff[i] <- mean(resp_diff * sample(c(-1,1), N, replace=T))
}
hist(mean_diff, prob=T, col="white", yaxt="n", ylab="", xlab="Mean Differences (hour)",
main=paste0(n_perm," Permutations of Raw Data"))
abline(v=mean(resp_diff), col="blue", lwd=3, lty=2)
lines(density(mean_diff), col="red", lwd=2)
sum(abs(mean_diff) >= abs(mean(resp_diff))) / n_perm # estimated p-value
mean_diff_exact <- colMeans(apply(expand.grid(rep(list(c(-1, 1)), N)),
1, function(signs) resp_diff * signs))
hist(mean_diff_exact, prob=T, col="white", yaxt="n", ylab="", xlab="Mean Differences (hour)",
main=substitute(2^exp * " Unique Permutations of Raw Data", list(exp=N)))
abline(v=mean(resp_diff), col="blue", lwd=3, lty=2)
lines(density(mean_diff_exact), col="red", lwd=2)
sum(abs(mean_diff_exact) >= abs(mean(resp_diff))) / (2^N) # exact p-value
## Repeated-Measures Analysis of Variance (RMANOVA)
summary(aov(extra ~ group + Error(ID/group), sleep)) # one-way RMANOVA
summary(aov(extra ~ group + ID, sleep)) # two-factor ANOVA
ezANOVA(sleep, dv=extra, wid=ID, within=.(group), type=2, detailed=T, return_aov=T)
?oneWayAOV.Fstat
friedman.test(extra ~ group | ID, sleep)
## Linear Mixed-Effects Regression (LMER)
summary(lmer(extra ~ group + (1 | ID), sleep, REML=F)) # random intercept for each subject
summary(lme(fixed=extra ~ group, random=~1 | ID, sleep, method="ML"))
fit1_LME <- lmer(extra ~ group + (1 | ID), sleep, REML=F)
fit0_LME <- lmer(extra ~ (1 | ID), sleep, REML=F)
exp((BIC(fit0_LME)-BIC(fit1_LME))/2) # Approx. BF10
getME(fit1_LME, "Z") # random-effects model matrix Z
kronecker(rep(1,2), diag(N)) # random-effects model matrix Z
model.matrix(fit1_LME) # design matrix for dummy (one-hot) coding
kronecker(diag(2), rep(1,N)) # design matrix for contrast (sum) coding
fit1_LMEr <- lme(fixed=extra ~ group, random=~1 | ID, sleep, method="ML")
fit0_LMEr <- lme(fixed=extra ~ 1, random=~1 | ID, sleep, method="ML")
exp((BIC(fit0_LMEr)-BIC(fit1_LMEr))/2) # Approx. BF10
## Bayesian t-Test
ttestBF(resp_diff)
## Bayesian RMANOVA
set.seed(277)
(fit_anovaBF <- anovaBF(extra ~ group + ID, data=sleep, whichRandom="ID", progress=F))
set.seed(277)
chains <- posterior(fit_anovaBF, iterations=1e5, progress=F) # sample from the posterior distribution
summary(chains)
model.matrix(fit_anovaBF) # design matrices X[,-1]·Q and Z
## Integrated Nested Laplace Approximation (INLA)
fit1_INLA <- inla(extra ~ 1 + group + f(ID, model="iid"),
family="gaussian", data=sleep)
inla.set.control.fixed.default(fit1_INLA)[c("mean.intercept", "prec.intercept", "mean", "prec")] #default priors
summary(fit1_INLA)
fit0_INLA <- inla(extra ~ 1 + f(ID, model="iid"),
family="gaussian", data=sleep)
summary(fit0_INLA)
bf10 <- exp(fit1_INLA$mlik - fit0_INLA$mlik)
rownames(bf10) <- c("integration", "Gaussian"); colnames(bf10) <- "BF10.est"
bf10
## Maximal Set of Random Effects (MRE) Model
# formula <- extra ~ 1 + group + f(ID, model="iid") + f(ID:group, model="iid")
# Change the hyperpriors for the precisions of the random effects and error
shape_subj <- rate_subj <- shape_err <- rate_err <- 0.0001
fit1r_INLA <- inla(extra ~ 1 + group + f(ID, model="iid",
hyper=list(prec=list(param=c(shape_subj, rate_subj)))),
control.family=list(hyper=list(prec=list(param=c(shape_err, rate_err)))),
family="gaussian", data=sleep)
summary(fit1r_INLA)
fit0r_INLA <- inla(extra ~ 1 + f(ID, model="iid",
hyper=list(prec=list(param=c(shape_subj, rate_subj)))),
control.family=list(hyper=list(prec=list(param=c(shape_err, rate_err)))),
family="gaussian", data=sleep)
summary(fit0r_INLA)
bf10r <- exp(fit1r_INLA$mlik - fit0r_INLA$mlik)
rownames(bf10r) <- c("integration", "Gaussian"); colnames(bf10r) <- "BF10.est"
bf10r
## Stan
computeQ <- function(C) {
#' reduced parametrization for the fixed treatment effects
S <- diag(C) - matrix(1, nrow = C, ncol = C) / C
e <- qr(S)
Q <- qr.Q(e) %*% diag(sign(diag(qr.R(e))))
Q[, which(abs(e$qraux) > 1e-3), drop = FALSE]
}
dat <- cbind(group1, group2) #wide format
stancodeM1 <- "
data {
int<lower=1> N; // number of subjects
int<lower=2> C; // number of conditions
// vector[C] Y[N]; // responses [removed features]
array[N] vector[C] Y; // responses [Stan 2.26+ syntax for array declarations]
matrix[C,C-1] Q; // projecting C fixed effects into C-1
real<lower=0> ht; // (square root) prior scale on the variability of the standardized treatment effects
real<lower=0> hb; // (square root) prior scale on the variability of the standardized subject-specific random effects
real lwr; // lower bound for the grand mean
real upr; // upper bound for the grand mean
}
parameters {
real mu; // grand mean
real<lower=0> sigma; // standard deviation of the error
real<lower=0> gt; // variance of the standardized projected fixed treatment effects
real<lower=0> gb; // variance of the standardized subject-specific random effects
vector[C-1] tf; // projected fixed treatment effects
vector[N] b; // subject-specific random effects
}
transformed parameters {
real<lower=0> eta; // sd of the projected fixed treatment effects
real<lower=0> tau; // sd of the subject-specific random effects
vector[C] t; // fixed treatment effects
eta = sigma * sqrt(gt);
tau = sigma * sqrt(gb);
t = Q * tf;
}
model {
target += uniform_lpdf(mu | lwr, upr);
target += -log(sigma); // Jeffreys prior
target += scaled_inv_chi_square_lpdf(gt | 1, ht);
target += scaled_inv_chi_square_lpdf(gb | 1, hb); // Rouder et al. (2012)
target += normal_lpdf(tf | 0, eta);
target += normal_lpdf(b | 0, tau);
for (i in 1:N) {
target += normal_lpdf(Y[i] | mu + t + b[i], sigma);
}
}
"
stanmodelM1 <- stan_model(model_code=stancodeM1)
datalistM1 <- list("N"=nrow(dat), "C"=ncol(dat), "Y"=dat,
"Q"=computeQ(ncol(dat)), "ht"=.5, "hb"=1)
datalistM1$lwr <- mean(datalistM1[["Y"]])-6*sd(datalistM1[["Y"]])
datalistM1$upr <- mean(datalistM1[["Y"]])+6*sd(datalistM1[["Y"]])
stanfitM1 <- sampling(stanmodelM1, data=datalistM1,
iter=50000, warmup=10000, chains=4, seed=277, refresh=0)
rstan::summary(stanfitM1)[[1]]
stancodeM0 <- "
data {
int<lower=1> N; // number of subjects
int<lower=2> C; // number of conditions
// vector[C] Y[N]; // responses [removed features]
array[N] vector[C] Y; // responses [Stan 2.26+ syntax for array declarations]
real<lower=0> hb; // (square root) prior scale on the variability of the standardized subject-specific random effects
real lwr; // lower bound for the grand mean
real upr; // upper bound for the grand mean
}
parameters {
real mu; // grand mean
real<lower=0> sigma; // standard deviation of the error
real<lower=0> gb; // variance of the standardized subject-specific random effects
vector[N] b; // subject-specific random effects
}
transformed parameters {
real<lower=0> tau; // sd of the subject-specific random effects
tau = sigma * sqrt(gb);
}
model {
target += uniform_lpdf(mu | lwr, upr);
target += -log(sigma); // Jeffreys prior
target += scaled_inv_chi_square_lpdf(gb | 1, hb); // Rouder et al. (2012)
target += normal_lpdf(b | 0, tau);
for (i in 1:N) {
target += normal_lpdf(Y[i] | mu + b[i], sigma);
}
}
"
stanmodelM0 <- stan_model(model_code=stancodeM0)
stanfitM0 <- sampling(stanmodelM0, data=datalistM1[-c(4,5)],
iter=50000, warmup=10000, chains=4, seed=277, refresh=0)
rstan::summary(stanfitM0)[[1]]
M1 <- bridge_sampler(stanfitM1, silent=T)
summary(M1)
M0 <- bridge_sampler(stanfitM0, silent=T)
summary(M0)
bf(M1, M0)