Introduction

Assume that we have data sampled from a multivariate t-distribution. Note that you can produce a multivariate t-distribution by dividing a multivariate normal by the square root of a chi-squared distribution divided by its degrees of freedom. See proof here: https://stats.stackexchange.com/questions/151854/a-normal-divided-by-the-sqrt-chi2s-s-gives-you-a-t-distribution-proof

In the t-distribution we have

b <- c(1, 2)
s <- c(0.4, 1)
nu <- 3
# correlations 
Rho <- rbind(
  c( 1.0,  0.4),
  c( 0.4,  1.0)
)
S <- diag(s) %*% Rho %*% diag(s)
# S needs to be positive definite
# For a positive definite matrix, the eigenvalues should be positive.
stopifnot(all(eigen(S)$values>0))
N_obs <- 200
set.seed(5) # used to replicate example
y_norm <- mvtnorm::rmvnorm( N_obs , b, S ) 
y_t <- mvtnorm::rmvt( N_obs , S , df = nu, delta = b) 
dfig <- data.table(
  rbind(
    data.table(desc = "Normal", y_norm),
    data.table(desc = "t-dist", y_t)
  )
)
ggplot(dfig, aes(x = V1, y = V2)) +
  geom_point() +
  geom_density_2d(aes(color = after_stat(level)), bins = 5) +
  scale_color_viridis_c() +
  facet_wrap(~desc)

Simulate 4d data.

b <- c(1, 3, 2, 2)
s <- c(0.4, 1, 0.8, 2)
# correlations 
Rho <- rbind(
  c( 1.0,  0.4, -0.3, -0.1),
  c( 0.4,  1.0,  0.7,  0.3),
  c(-0.3,  0.7,  1.0,  0.1),
  c(-0.1,  0.3,  0.1,  1.0)
)
S <- diag(s) %*% Rho %*% diag(s)
# S needs to be positive definite
# For a positive definite matrix, the eigenvalues should be positive.
stopifnot(all(eigen(S)$values>0))
N_obs <- 200
set.seed(5) # used to replicate example
y <- mvtnorm::rmvt( N_obs , S , df = 3, delta = b) 

We model the data along the lines of

\[ \begin{aligned} \mathbf{y} &\sim MVT(\nu, \mathbf{\mu}, \mathbf{\Sigma}) \\ \mathbf{\mu} &\sim t-dist(3, 0, 1) \\ \mathbf{\Sigma} &= \begin{pmatrix} \sigma_{\mu_{1}} & 0 & 0 & 0 \\ 0 & \sigma_{\mu_{2}} & 0 & 0 \\ 0 & 0 & \sigma_{\mu_{3}} & 0 \\ 0 & 0 & 0 & \sigma_{\mu_{4}} \\ \end{pmatrix} R \begin{pmatrix} \sigma_{\mu_{1}} & 0 & 0 & 0 \\ 0 & \sigma_{\mu_{2}} & 0 & 0 \\ 0 & 0 & \sigma_{\mu_{3}} & 0 \\ 0 & 0 & 0 & \sigma_{\mu_{4}} \\ \end{pmatrix} \\ R &\sim LKJcorr(2) \\ \sigma &\sim t-dist(3, 0, 1)^+ \end{aligned} \]

Prior on the Cholesky factor

Jump straight to the Cholesky factor approach, which is normally a bit quicker than putting a prior on the correlation matrix:

m1 <- cmdstanr::cmdstan_model(paste0(path_pref, "/stan/mvt_01.stan"))
m1
## data {
##   int N;
##   array[N] vector[4] y;
## }
## parameters {
##   // group means
##   vector[4] b;
##   cholesky_factor_corr[4] LRho; 
##   vector<lower=0>[4] s;
##   real nu;
## }
## transformed parameters{
## }
## model {
##   target += lkj_corr_cholesky_lpdf(LRho | 2);
##   // for no particular reason
##   target += student_t_lpdf(b | 3, 0, 1);
##   target += student_t_lpdf(s | 3, 0, 1) - student_t_lccdf(0 | 3, 0, 1);
##   target += student_t_lpdf(nu | 3, 0, 1) - student_t_lccdf(0 | 3, 0, 1);
##   target += multi_student_t_cholesky_lpdf(y | nu, b, diag_pre_multiply(s, LRho));
## }
## generated quantities{
##   matrix[4,4] Rho;
##   Rho = multiply_lower_tri_self_transpose(LRho);
## }
set.seed(2)
lsd <- list(
  N = nrow(y),
  y = y
)

f1 <- m1$sample(lsd,
    iter_warmup = 1000,
    iter_sampling = 2000,
    parallel_chains = 3,
    chains = 3,
    refresh = 0, show_exceptions = F)
## Running MCMC with 3 parallel chains...
## 
## Chain 1 finished in 11.9 seconds.
## Chain 3 finished in 12.1 seconds.
## Chain 2 finished in 12.2 seconds.
## 
## All 3 chains finished successfully.
## Mean chain execution time: 12.1 seconds.
## Total execution time: 12.3 seconds.
rbind(b, s)
f1$summary(variables = c("b", "s"))
##   [,1] [,2] [,3] [,4]
## b  1.0    3  2.0    2
## s  0.4    1  0.8    2
## # A tibble: 8 × 10
##   variable  mean median     sd    mad    q5   q95  rhat ess_bulk ess_tail
##   <chr>    <num>  <num>  <num>  <num> <num> <num> <num>    <num>    <num>
## 1 b[1]     0.937  0.937 0.0322 0.0325 0.884 0.989  1.00    5426.    4387.
## 2 b[2]     3.01   3.01  0.0767 0.0764 2.89  3.14   1.00    4446.    3903.
## 3 b[3]     2.08   2.08  0.0650 0.0647 1.97  2.19   1.00    4716.    4482.
## 4 b[4]     2.19   2.18  0.147  0.145  1.95  2.43   1.00    6240.    4291.
## 5 s[1]     0.401  0.400 0.0280 0.0275 0.357 0.450  1.00    4183.    4484.
## 6 s[2]     0.948  0.946 0.0644 0.0652 0.846 1.06   1.00    3182.    3153.
## 7 s[3]     0.812  0.810 0.0543 0.0542 0.725 0.905  1.00    3492.    3769.
## 8 s[4]     1.83   1.83  0.127  0.126  1.63  2.05   1.00    3820.    4462.
Rho[1,]
f1$summary(variables = c("Rho[1,2]","Rho[1,3]","Rho[1,4]"))
## [1]  1.0  0.4 -0.3 -0.1
## # A tibble: 3 × 10
##   variable    mean  median     sd    mad     q5    q95  rhat ess_bulk ess_tail
##   <chr>      <num>   <num>  <num>  <num>  <num>  <num> <num>    <num>    <num>
## 1 Rho[1,2]  0.374   0.376  0.0660 0.0681  0.263  0.476  1.00    3689.    3945.
## 2 Rho[1,3] -0.369  -0.371  0.0681 0.0677 -0.479 -0.256  1.00    4147.    3657.
## 3 Rho[1,4]  0.0197  0.0198 0.0785 0.0785 -0.109  0.149  1.00    5303.    4252.
Rho[2,]
f1$summary(variables = c("Rho[2,3]","Rho[2,4]"))
## [1] 0.4 1.0 0.7 0.3
## # A tibble: 2 × 10
##   variable  mean median     sd    mad    q5   q95  rhat ess_bulk ess_tail
##   <chr>    <num>  <num>  <num>  <num> <num> <num> <num>    <num>    <num>
## 1 Rho[2,3] 0.663  0.665 0.0449 0.0450 0.585 0.732  1.00    5523.    4700.
## 2 Rho[2,4] 0.325  0.328 0.0682 0.0686 0.210 0.435  1.00    6297.    3828.
Rho[3,]
f1$summary(variables = c("Rho[3,4]"))
## [1] -0.3  0.7  1.0  0.1
## # A tibble: 1 × 10
##   variable   mean median     sd    mad     q5   q95  rhat ess_bulk ess_tail
##   <chr>     <num>  <num>  <num>  <num>  <num> <num> <num>    <num>    <num>
## 1 Rho[3,4] 0.0268 0.0277 0.0769 0.0781 -0.102 0.150  1.00    5217.    4653.
nu
f1$summary(variables = c("nu"))
## [1] 3
## # A tibble: 1 × 10
##   variable  mean median    sd   mad    q5   q95  rhat ess_bulk ess_tail
##   <chr>    <num>  <num> <num> <num> <num> <num> <num>    <num>    <num>
## 1 nu        2.88   2.85 0.397 0.393  2.28  3.59  1.00    5273.    4041.