============================================================================================================
PREVIOUS: weakly informative prior for covariance matrices, https://rpubs.com/sherloconan/1081051
PREVIOUS: inverse-Wishart prior for the error covariance matrix, https://rpubs.com/sherloconan/1085223
THIS: GLS and BIC approximation, https://rpubs.com/sherloconan/1093391
In Loftus and Masson (1994), some to-be-recalled 20-word lists are presented at a rate of 1, 2, or 5 sec per word. Suppose that the hypothetical experiment is run as a one-way within-subject (repeated-measures) design with three conditions.
dat <- matrix(c(10,6,11,22,16,15,1,12,9,8,
13,8,14,23,18,17,1,15,12,9,
13,8,14,25,20,17,4,17,12,12), ncol=3,
dimnames=list(paste0("s",1:10), paste0("Level",1:3))) #wide data format
# dat <- dat * 100
df <- data.frame("ID"=factor(rep(paste0("s",1:10), 3)),
"Level"=factor(rep(paste0("Level",1:3), each=10)),
"Response"=c(dat)) #long data format
Or, testing a simulated data set.
mu.b <- 100; sigma.b <- 20; n <- 24
delta.mu <- 6.7; sigma.e <- 6.67 #rho=.9, power=.8
set.seed(277)
TrueScore <- rnorm(n, mu.b, sigma.b)
eij1 <- rnorm(n, 0, sigma.e)
eij2 <- rnorm(n, 0, sigma.e)
dat <- cbind("Level1"=TrueScore+eij1+delta.mu,
"Level2"=TrueScore+eij2)
# dat <- dat * 100
df <- data.frame("ID"=factor(rep(paste0("s",1:n), 2)),
"Level"=factor(rep(c("Level1", "Level2"), each=n)),
"Response"=c(dat))
## The Bayes factors will be computed for the hypothetical data (in the first chunk).
Method: Constructing the Jeffreys-Zellner-Siow (JZS) Bayes factor.
\[\begin{align*} \tag{1} \mathcal{M}_1:\ Y_{ij}&=\mu+\sigma_\epsilon(d_i+b_j)+\epsilon_{ij}\\ \text{versus}\quad\mathcal{M}_0:\ Y_{ij}&=\mu+\sigma_\epsilon b_j+\epsilon_{ij},\qquad\epsilon_{ij}\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,\sigma_\epsilon^2) \text{ for } i=1,\dotsb,a;\ j=1,\dotsb,n. \end{align*}\]
\[\begin{equation} \tag{2} \pi(\mu,\sigma_\epsilon^2)\propto 1/\ \sigma_\epsilon^{2} \end{equation}\]
\[\begin{equation} \tag{3} d_i^\star\mid g\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g) \end{equation}\]
\[\begin{equation} \tag{4} g\sim\text{Scale-inv-}\chi^2(1,h^2) \end{equation}\]
\[\begin{equation} \tag{5} (d_1^\star,\dotsb,d_{a-1}^\star)=(d_1,\dotsb,d_a)\cdot\mathbf{Q} \end{equation}\]
\[\begin{equation} \tag{6} \mathbf{I}_a-\frac{1}{a}\mathbf{J}_a=\mathbf{Q}\cdot\mathbf{Q}^\top \end{equation}\]
\[\begin{equation} \tag{7} b_j\mid g_b\overset{\text{i.i.d.}}{\sim}\mathcal{N}(0,g_b) \end{equation}\]
\[\begin{equation} \tag{8} g_b\sim\text{Scale-inv-}\chi^2(1,h_b^2) \end{equation}\]
\(\mathbf{Q}\) is an \(a\times(a-1)\) matrix of the \(a-1\) eigenvectors of unit length corresponding to the nonzero eigenvalues of the left side term in (6). For example, the projected main effect \(d^\star=(d_1-d_2)/\sqrt{2}\) when \(a=2\). In the other direction (given \(d_1+d_2=0\)), \(d_1=d^\star/\sqrt{2}\) and \(d_2=-d^\star/\sqrt{2}\).
set.seed(277)
(JZS_BF <- anovaBF(Response~Level+ID, data=df, whichRandom="ID", progress=F))
## Bayes factor analysis
## --------------
## [1] Level + ID : 35959.93 ±0.56%
##
## Against denominator:
## Response ~ ID
## ---
## Bayes factor type: BFlinearModel, JZS
Note: Fixed effects are assumed. See the discussion here.
Markov chain Monte Carlo (MCMC) diagnostics. Plots of iterations versus sampled values for each variable in the chain.
set.seed(277)
draws <- posterior(JZS_BF, iterations=1e5, progress=F)
summary(draws)
##
## Iterations = 1:1e+05
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 1e+05
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## mu 12.7292 1.9846 0.0062759 0.0062759
## Level-Level1 -1.6542 0.2364 0.0007475 0.0008613
## Level-Level2 0.2538 0.2241 0.0007088 0.0007100
## Level-Level3 1.4004 0.2330 0.0007367 0.0008187
## ID-s1 -0.7238 2.0379 0.0064445 0.0064445
## ID-s10 -3.0369 2.0370 0.0064415 0.0064415
## ID-s2 -5.3524 2.0380 0.0064446 0.0064446
## ID-s3 0.2681 2.0370 0.0064416 0.0064416
## ID-s4 10.5135 2.0384 0.0064461 0.0064461
## ID-s5 5.2273 2.0393 0.0064489 0.0064489
## ID-s6 3.5731 2.0352 0.0064359 0.0064359
## ID-s7 -10.6347 2.0369 0.0064412 0.0064412
## ID-s8 1.9211 2.0361 0.0064386 0.0064386
## ID-s9 -1.7162 2.0395 0.0064495 0.0064495
## sig2 0.7938 0.3294 0.0010416 0.0023032
## g_Level 7.7913 94.4643 0.2987222 0.2987222
## g_ID 56.2794 39.9360 0.1262886 0.2052537
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## mu 8.7772 11.4887 12.7270 13.9666 16.6988
## Level-Level1 -2.1042 -1.8100 -1.6599 -1.5046 -1.1664
## Level-Level2 -0.1907 0.1090 0.2547 0.3998 0.6950
## Level-Level3 0.9238 1.2533 1.4041 1.5527 1.8510
## ID-s1 -4.7946 -2.0006 -0.7140 0.5661 3.3221
## ID-s10 -7.1093 -4.3147 -3.0322 -1.7511 0.9941
## ID-s2 -9.4374 -6.6315 -5.3413 -4.0637 -1.3013
## ID-s3 -3.8028 -1.0187 0.2716 1.5472 4.3035
## ID-s4 6.4649 9.2282 10.5097 11.7944 14.6162
## ID-s5 1.1702 3.9469 5.2212 6.5136 9.2945
## ID-s6 -0.4687 2.2866 3.5639 4.8580 7.6215
## ID-s7 -14.6990 -11.9076 -10.6180 -9.3487 -6.6167
## ID-s8 -2.1454 0.6405 1.9223 3.2007 5.9699
## ID-s9 -5.8027 -3.0012 -1.7110 -0.4266 2.3528
## sig2 0.3849 0.5714 0.7223 0.9325 1.6225
## g_Level 0.5096 1.5649 2.9401 6.0788 36.1203
## g_ID 14.7594 31.1880 46.1816 68.9989 157.4876
plot(draws, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
\[\begin{align*} \tag{9} \mathcal{M}_1^*:\ Y_{ij}&=\mu+t_i+\epsilon_{ij}^*\\ \text{versus}\quad\mathcal{M}_0^*:\ Y_{ij}&=\mu+\epsilon_{ij}^*,\qquad\text{ for } i=1,\dotsb,a;\ j=1,\dotsb,n. \end{align*}\]
\[\begin{equation} \tag{10} \mathbf{V}_j\equiv\text{Cov}(\epsilon_{ij}^*)=\sigma^2 \begin{pmatrix} 1 & \rho_{12} & \dotsb & \rho_{1a}\\ \rho_{21} & 1 & \dotsb & \rho_{2a}\\ \vdots & \vdots & \rho_{pq} & \vdots\\ \rho_{a1} & \rho_{a2} & \dotsb & 1 \end{pmatrix}_{a\times a} \end{equation}\]
\[\begin{equation} \tag{11} \mathit{BF}_{01}\approx\text{exp}\left\{\frac{\text{BIC}(\mathcal{M}_1^*)-\text{BIC}(\mathcal{M}_0^*)}{2}\right\} \end{equation}\]
The correlation between any two observations within the same subject \(j\) can be different in (10). \(1+a(a-1)/2\) parameters in an unstructured matrix.
gls_full <- nlme::gls(Response ~ Level, data=df, method="ML", #Generalized Least Squares
correlation=nlme::corSymm(form=~1 | ID))
gls_null <- nlme::gls(Response ~ 1, data=df, method="ML",
correlation=nlme::corSymm(form=~1 | ID))
exp((BIC(gls_null)-BIC(gls_full))/2) #BF_{10}
## [1] 38787.95
summary(gls_full)
## Generalized least squares fit by maximum likelihood
## Model: Response ~ Level
## Data: df
## AIC BIC logLik
## 130.8927 140.7011 -58.44634
##
## Correlation Structure: General
## Formula: ~1 | ID
## Parameter estimate(s):
## Correlation:
## 1 2
## 2 0.984
## 3 0.988 0.976
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 11.0 1.8940661 5.807611 0
## LevelLevel2 2.0 0.3338535 5.990652 0
## LevelLevel3 3.2 0.2912425 10.987408 0
##
## Correlation:
## (Intr) LvlLv2
## LevelLevel2 -0.088
## LevelLevel3 -0.077 0.141
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.11185870 -0.49276703 -0.01759882 0.65115643 1.93587047
##
## Residual standard error: 5.682198
## Degrees of freedom: 30 total; 27 residual
summary(gls_null)
## Generalized least squares fit by maximum likelihood
## Model: Response ~ 1
## Data: df
## AIC BIC logLik
## 154.8268 161.8328 -72.4134
##
## Correlation Structure: General
## Formula: ~1 | ID
## Parameter estimate(s):
## Correlation:
## 1 2
## 2 0.927
## 3 0.840 0.956
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 12.52879 1.781252 7.033697 0
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.981958122 -0.606647468 -0.004949057 0.725684728 2.143973840
##
## Residual standard error: 5.816868
## Degrees of freedom: 30 total; 29 residual