Assume that we have data sampled from a multivariate t-distribution. Note that you can produce a multivariate t-distribution by dividing a multivariate normal by the square root of a chi-squared distribution divided by its degrees of freedom. See proof here: https://stats.stackexchange.com/questions/151854/a-normal-divided-by-the-sqrt-chi2s-s-gives-you-a-t-distribution-proof
In the t-distribution we have
b <- c(1, 2)
s <- c(0.4, 1)
nu <- 3
# correlations
Rho <- rbind(
c( 1.0, 0.4),
c( 0.4, 1.0)
)
S <- diag(s) %*% Rho %*% diag(s)
# S needs to be positive definite
# For a positive definite matrix, the eigenvalues should be positive.
stopifnot(all(eigen(S)$values>0))
N_obs <- 200
set.seed(5) # used to replicate example
y_norm <- mvtnorm::rmvnorm( N_obs , b, S )
y_t <- mvtnorm::rmvt( N_obs , S , df = nu, delta = b)
dfig <- data.table(
rbind(
data.table(desc = "Normal", y_norm),
data.table(desc = "t-dist", y_t)
)
)
ggplot(dfig, aes(x = V1, y = V2)) +
geom_point() +
geom_density_2d(aes(color = after_stat(level)), bins = 5) +
scale_color_viridis_c() +
facet_wrap(~desc)
Simulate 4d data.
b <- c(1, 3, 2, 2)
s <- c(0.4, 1, 0.8, 2)
# correlations
Rho <- rbind(
c( 1.0, 0.4, -0.3, -0.1),
c( 0.4, 1.0, 0.7, 0.3),
c(-0.3, 0.7, 1.0, 0.1),
c(-0.1, 0.3, 0.1, 1.0)
)
S <- diag(s) %*% Rho %*% diag(s)
# S needs to be positive definite
# For a positive definite matrix, the eigenvalues should be positive.
stopifnot(all(eigen(S)$values>0))
N_obs <- 200
set.seed(5) # used to replicate example
y <- mvtnorm::rmvt( N_obs , S , df = 3, delta = b)
We model the data along the lines of
\[ \begin{aligned} \mathbf{y} &\sim MVT(\nu, \mathbf{\mu}, \mathbf{\Sigma}) \\ \mathbf{\mu} &\sim t-dist(3, 0, 1) \\ \mathbf{\Sigma} &= \begin{pmatrix} \sigma_{\mu_{1}} & 0 & 0 & 0 \\ 0 & \sigma_{\mu_{2}} & 0 & 0 \\ 0 & 0 & \sigma_{\mu_{3}} & 0 \\ 0 & 0 & 0 & \sigma_{\mu_{4}} \\ \end{pmatrix} R \begin{pmatrix} \sigma_{\mu_{1}} & 0 & 0 & 0 \\ 0 & \sigma_{\mu_{2}} & 0 & 0 \\ 0 & 0 & \sigma_{\mu_{3}} & 0 \\ 0 & 0 & 0 & \sigma_{\mu_{4}} \\ \end{pmatrix} \\ R &\sim LKJcorr(2) \\ \sigma &\sim t-dist(3, 0, 1)^+ \end{aligned} \]
Jump straight to the Cholesky factor approach, which is normally a bit quicker than putting a prior on the correlation matrix:
m1 <- cmdstanr::cmdstan_model(paste0(path_pref, "/stan/mvt_01.stan"))
m1
## data {
## int N;
## array[N] vector[4] y;
## }
## parameters {
## // group means
## vector[4] b;
## cholesky_factor_corr[4] LRho;
## vector<lower=0>[4] s;
## real nu;
## }
## transformed parameters{
## }
## model {
## target += lkj_corr_cholesky_lpdf(LRho | 2);
## // for no particular reason
## target += student_t_lpdf(b | 3, 0, 1);
## target += student_t_lpdf(s | 3, 0, 1) - student_t_lccdf(0 | 3, 0, 1);
## target += student_t_lpdf(nu | 3, 0, 1) - student_t_lccdf(0 | 3, 0, 1);
## target += multi_student_t_cholesky_lpdf(y | nu, b, diag_pre_multiply(s, LRho));
## }
## generated quantities{
## matrix[4,4] Rho;
## Rho = multiply_lower_tri_self_transpose(LRho);
## }
set.seed(2)
lsd <- list(
N = nrow(y),
y = y
)
f1 <- m1$sample(lsd,
iter_warmup = 1000,
iter_sampling = 2000,
parallel_chains = 3,
chains = 3,
refresh = 0, show_exceptions = F)
## Running MCMC with 3 parallel chains...
##
## Chain 1 finished in 11.9 seconds.
## Chain 3 finished in 12.1 seconds.
## Chain 2 finished in 12.2 seconds.
##
## All 3 chains finished successfully.
## Mean chain execution time: 12.1 seconds.
## Total execution time: 12.3 seconds.
rbind(b, s)
f1$summary(variables = c("b", "s"))
## [,1] [,2] [,3] [,4]
## b 1.0 3 2.0 2
## s 0.4 1 0.8 2
## # A tibble: 8 × 10
## variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
## <chr> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1 b[1] 0.937 0.937 0.0322 0.0325 0.884 0.989 1.00 5426. 4387.
## 2 b[2] 3.01 3.01 0.0767 0.0764 2.89 3.14 1.00 4446. 3903.
## 3 b[3] 2.08 2.08 0.0650 0.0647 1.97 2.19 1.00 4716. 4482.
## 4 b[4] 2.19 2.18 0.147 0.145 1.95 2.43 1.00 6240. 4291.
## 5 s[1] 0.401 0.400 0.0280 0.0275 0.357 0.450 1.00 4183. 4484.
## 6 s[2] 0.948 0.946 0.0644 0.0652 0.846 1.06 1.00 3182. 3153.
## 7 s[3] 0.812 0.810 0.0543 0.0542 0.725 0.905 1.00 3492. 3769.
## 8 s[4] 1.83 1.83 0.127 0.126 1.63 2.05 1.00 3820. 4462.
Rho[1,]
f1$summary(variables = c("Rho[1,2]","Rho[1,3]","Rho[1,4]"))
## [1] 1.0 0.4 -0.3 -0.1
## # A tibble: 3 × 10
## variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
## <chr> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1 Rho[1,2] 0.374 0.376 0.0660 0.0681 0.263 0.476 1.00 3689. 3945.
## 2 Rho[1,3] -0.369 -0.371 0.0681 0.0677 -0.479 -0.256 1.00 4147. 3657.
## 3 Rho[1,4] 0.0197 0.0198 0.0785 0.0785 -0.109 0.149 1.00 5303. 4252.
Rho[2,]
f1$summary(variables = c("Rho[2,3]","Rho[2,4]"))
## [1] 0.4 1.0 0.7 0.3
## # A tibble: 2 × 10
## variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
## <chr> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1 Rho[2,3] 0.663 0.665 0.0449 0.0450 0.585 0.732 1.00 5523. 4700.
## 2 Rho[2,4] 0.325 0.328 0.0682 0.0686 0.210 0.435 1.00 6297. 3828.
Rho[3,]
f1$summary(variables = c("Rho[3,4]"))
## [1] -0.3 0.7 1.0 0.1
## # A tibble: 1 × 10
## variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
## <chr> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1 Rho[3,4] 0.0268 0.0277 0.0769 0.0781 -0.102 0.150 1.00 5217. 4653.
nu
f1$summary(variables = c("nu"))
## [1] 3
## # A tibble: 1 × 10
## variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
## <chr> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1 nu 2.88 2.85 0.397 0.393 2.28 3.59 1.00 5273. 4041.