Assume that we have data sampled from a multivariate normal.
b <- c(1, 3, 2, 2)
s <- c(0.4, 1, 0.8, 2)
# correlations
Rho <- rbind(
c( 1.0, 0.4, -0.3, -0.1),
c( 0.4, 1.0, 0.7, 0.3),
c(-0.3, 0.7, 1.0, 0.1),
c(-0.1, 0.3, 0.1, 1.0)
)
S <- diag(s) %*% Rho %*% diag(s)
# S needs to be positive definite
# For a positive definite matrix, the eigenvalues should be positive.
stopifnot(all(eigen(S)$values>0))
N_obs <- 200
set.seed(5) # used to replicate example
y <- mvtnorm::rmvnorm( N_obs , b , S )
We model the data as
\[ \begin{aligned} \mathbf{y} &\sim MVNormal(\mathbf{\mu}, \mathbf{\Sigma}) \\ \mathbf{\mu} &\sim t-dist(3, 0, 1) \\ \mathbf{\Sigma} &= \begin{pmatrix} \sigma_{\mu_{1}} & 0 & 0 & 0 \\ 0 & \sigma_{\mu_{2}} & 0 & 0 \\ 0 & 0 & \sigma_{\mu_{3}} & 0 \\ 0 & 0 & 0 & \sigma_{\mu_{4}} \\ \end{pmatrix} R \begin{pmatrix} \sigma_{\mu_{1}} & 0 & 0 & 0 \\ 0 & \sigma_{\mu_{2}} & 0 & 0 \\ 0 & 0 & \sigma_{\mu_{3}} & 0 \\ 0 & 0 & 0 & \sigma_{\mu_{4}} \\ \end{pmatrix} \\ R &\sim LKJcorr(2) \\ \sigma &\sim t-dist(3, 0, 1)^+ \end{aligned} \]
In stan, this can be implemented with a prior on the correlation matrix:
m2a <- cmdstanr::cmdstan_model(paste0(path_pref, "/stan/mvn_02a.stan"))
m2a
## data {
## int N;
## array[N] vector[4] y;
## }
## parameters {
## // group means
## vector[4] b;
## corr_matrix[4] Rho;
## vector<lower=0>[4] s;
## }
## transformed parameters{
## }
## model {
## target += lkj_corr_lpdf(Rho | 2);
## // for no particular reason
## target += student_t_lpdf(b | 3, 0, 1);
## target += student_t_lpdf(s | 3, 0, 1) - student_t_lccdf(0 | 3, 0, 1);
## target += multi_normal_lpdf( y | b , quad_form_diag(Rho , s) );
## }
## generated quantities{
## }
Fit the model:
set.seed(2)
lsd <- list(
N = nrow(y),
y = y
)
f2a <- m2a$sample(lsd,
iter_warmup = 1000,
iter_sampling = 2000,
parallel_chains = 3,
chains = 3,
refresh = 0, show_exceptions = F)
## Running MCMC with 3 parallel chains...
##
## Chain 3 finished in 5.3 seconds.
## Chain 2 finished in 5.6 seconds.
## Chain 1 finished in 5.9 seconds.
##
## All 3 chains finished successfully.
## Mean chain execution time: 5.6 seconds.
## Total execution time: 6.0 seconds.
and summarise the parameters:
rbind(b, s)
f2a$summary(variables = c("b", "s"))
## [,1] [,2] [,3] [,4]
## b 1.0 3 2.0 2
## s 0.4 1 0.8 2
## # A tibble: 8 × 10
## variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
## <chr> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1 b[1] 0.941 0.941 0.0292 0.0293 0.893 0.989 1.00 6061. 4442.
## 2 b[2] 3.03 3.03 0.0717 0.0703 2.91 3.15 1.00 4860. 4219.
## 3 b[3] 2.10 2.10 0.0564 0.0556 2.00 2.19 1.00 5441. 4012.
## 4 b[4] 2.16 2.16 0.134 0.131 1.94 2.38 1.00 6766. 4355.
## 5 s[1] 0.411 0.410 0.0206 0.0203 0.379 0.446 1.00 6368. 4394.
## 6 s[2] 1.02 1.02 0.0508 0.0495 0.940 1.11 1.00 4176. 4058.
## 7 s[3] 0.802 0.800 0.0400 0.0398 0.739 0.872 1.00 4508. 3626.
## 8 s[4] 1.87 1.87 0.0937 0.0929 1.73 2.04 1.00 5493. 4377.
Rho[1,]
f2a$summary(variables = c("Rho[1,2]","Rho[1,3]","Rho[1,4]"))
## [1] 1.0 0.4 -0.3 -0.1
## # A tibble: 3 × 10
## variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
## <chr> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1 Rho[1,2] 0.450 0.452 0.0554 0.0547 0.356 0.538 1.00 4371. 4036.
## 2 Rho[1,3] -0.271 -0.272 0.0653 0.0650 -0.377 -0.161 1.00 4577. 4197.
## 3 Rho[1,4] 0.0205 0.0211 0.0691 0.0698 -0.0934 0.134 1.00 6911. 4406.
Rho[2,]
f2a$summary(variables = c("Rho[2,3]","Rho[2,4]"))
## [1] 0.4 1.0 0.7 0.3
## # A tibble: 2 × 10
## variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
## <chr> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1 Rho[2,3] 0.680 0.682 0.0375 0.0378 0.617 0.739 1.00 5228. 4519.
## 2 Rho[2,4] 0.330 0.331 0.0623 0.0623 0.223 0.428 1.00 6533. 4296.
Rho[3,]
f2a$summary(variables = c("Rho[3,4]"))
## [1] -0.3 0.7 1.0 0.1
## # A tibble: 1 × 10
## variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
## <chr> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1 Rho[3,4] 0.0575 0.0578 0.0697 0.0689 -0.0597 0.172 1.00 6304. 4448.
The other way to implement this is via a prior on the Cholesky factor, which is normally a bit quicker:
m2b <- cmdstanr::cmdstan_model(paste0(path_pref, "/stan/mvn_02b.stan"))
m2b
## data {
## int N;
## array[N] vector[4] y;
## }
## parameters {
## // group means
## vector[4] b;
## cholesky_factor_corr[4] LRho;
## vector<lower=0>[4] s;
## }
## transformed parameters{
## }
## model {
## target += lkj_corr_cholesky_lpdf(LRho | 2);
## // for no particular reason
## target += student_t_lpdf(b | 3, 0, 1);
## target += student_t_lpdf(s | 3, 0, 1) - student_t_lccdf(0 | 3, 0, 1);
## target += multi_normal_cholesky_lpdf( y | b , diag_pre_multiply(s, LRho));
## }
## generated quantities{
## matrix[4,4] Rho;
## Rho = multiply_lower_tri_self_transpose(LRho);
## }
set.seed(2)
lsd <- list(
N = nrow(y),
y = y
)
f2b <- m2b$sample(lsd,
iter_warmup = 1000,
iter_sampling = 2000,
parallel_chains = 3,
chains = 3,
refresh = 0, show_exceptions = F)
## Running MCMC with 3 parallel chains...
##
## Chain 3 finished in 4.2 seconds.
## Chain 2 finished in 4.4 seconds.
## Chain 1 finished in 4.6 seconds.
##
## All 3 chains finished successfully.
## Mean chain execution time: 4.4 seconds.
## Total execution time: 4.7 seconds.
rbind(b, s)
f2b$summary(variables = c("b", "s"))
## [,1] [,2] [,3] [,4]
## b 1.0 3 2.0 2
## s 0.4 1 0.8 2
## # A tibble: 8 × 10
## variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
## <chr> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1 b[1] 0.941 0.941 0.0288 0.0286 0.893 0.989 1.00 6923. 3561.
## 2 b[2] 3.03 3.03 0.0727 0.0725 2.91 3.15 1.00 5741. 3773.
## 3 b[3] 2.10 2.10 0.0572 0.0579 2.00 2.19 1.00 6209. 4307.
## 4 b[4] 2.17 2.17 0.130 0.128 1.95 2.38 1.00 7716. 4243.
## 5 s[1] 0.411 0.410 0.0214 0.0213 0.378 0.448 1.00 7104. 4337.
## 6 s[2] 1.02 1.02 0.0508 0.0502 0.940 1.11 1.00 4517. 4000.
## 7 s[3] 0.802 0.800 0.0409 0.0406 0.738 0.872 1.00 4296. 4171.
## 8 s[4] 1.88 1.87 0.0936 0.0914 1.73 2.04 1.00 5879. 4799.
Rho[1,]
f2b$summary(variables = c("Rho[1,2]","Rho[1,3]","Rho[1,4]"))
## [1] 1.0 0.4 -0.3 -0.1
## # A tibble: 3 × 10
## variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
## <chr> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1 Rho[1,2] 0.449 0.451 0.0556 0.0549 0.353 0.538 1.00 4319. 3994.
## 2 Rho[1,3] -0.272 -0.274 0.0644 0.0648 -0.376 -0.165 1.00 4864. 4213.
## 3 Rho[1,4] 0.0189 0.0183 0.0680 0.0688 -0.0925 0.129 1.00 6734. 4387.
Rho[2,]
f2b$summary(variables = c("Rho[2,3]","Rho[2,4]"))
## [1] 0.4 1.0 0.7 0.3
## # A tibble: 2 × 10
## variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
## <chr> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1 Rho[2,3] 0.680 0.682 0.0384 0.0374 0.613 0.740 1.00 5711. 4023.
## 2 Rho[2,4] 0.330 0.331 0.0611 0.0625 0.229 0.429 1.00 6987. 4743.
Rho[3,]
f2b$summary(variables = c("Rho[3,4]"))
## [1] -0.3 0.7 1.0 0.1
## # A tibble: 1 × 10
## variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
## <chr> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1 Rho[3,4] 0.0580 0.0574 0.0687 0.0696 -0.0545 0.171 1.00 5818. 4571.
You can always re-write a linear regression as a multivariate normal distribution.
\[ \begin{aligned} \mathbf{y} &\sim MVNormal(\mathbf{\mu}, \mathbf{\Sigma}) \\ \mu_i &\sim \alpha + \beta x_i \\ \mathbf{\Sigma} &= I \sigma^2 \\ \alpha &\sim Normal(0, 1) \\ \beta &\sim Normal(0, 1) \\ \sigma &\sim t-dist(3, 0, 1)^+ \end{aligned} \]
where \(I\) denotes the identity matrix. One reason to formulate the regression like this is because the natural next step is to plug in something other than the identity matrix, e.g. a density kernel as used in a gaussian process.