library(tidyverse)
library(rstan)
## devtools::install_github('jburos/biostan', build_vignettes = TRUE, dependencies = TRUE)
## library(biostan)
set.seed(732565397)
We generate a dataset with clearly separate modes.
data1 <- data_frame(z = 1 + rbinom(n = 82, size = 1, prob = 0.3),
x1 = rnorm(n = length(z), mean = -10, sd = 1),
x2 = rnorm(n = length(z), mean = +8, sd = 2),
x = ifelse(z == 1, x1, x2))
ggplot(data = data1, mapping = aes(x = x)) +
geom_point(y = 0.5) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
Set range of values to examine estimated density.
grid_max <- 20
grid_min <- -20
grid_length <- 100
Define some helper functions
print_relevant_pars <- function(fit) {
print(fit, pars = c("mu","sigma_squared","Pi","sigma","lp__"))
}
plot_draws <- function(stan_fit) {
## Note direct access to global variables
draw_data <- tidybayes::tidy_draws(stan_fit) %>%
select(.chain, .iteration, .draw, starts_with("log_f")) %>%
gather(key = key, value = value, starts_with("log_f")) %>%
mutate(key = gsub("log_f|\\[|\\]", "", key) %>% as.integer(),
x = factor(key, labels = seq(from = grid_min, to = grid_max, length.out = grid_length)) %>%
as.character() %>%
as.numeric(),
value = exp(value))
summary_density <- draw_data %>%
group_by(.chain, x) %>%
summarize(value = mean(value))
ggplot(data = summary_density, mapping = aes(x = x, y = value, group = .chain)) +
geom_line(size = 0.5, color = "gray") +
geom_point(data = data1, mapping = aes(x = x, group = NULL), y = 0) +
facet_wrap(~ .chain) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
}
traceplot_all <- function(fit) {
print(traceplot(fit, inc_warmup = TRUE, pars = "mu"))
print(traceplot(fit, inc_warmup = TRUE, pars = "sigma"))
print(traceplot(fit, inc_warmup = TRUE, pars = "Pi"))
print(traceplot(fit, inc_warmup = FALSE, pars = "lp__"))
}
pairs_plot_all <- function(fit) {
pairs(fit, pars = "mu")
pairs(fit, pars = "sigma_squared")
pairs(fit, pars = "Pi")
}
Let \(z_{i} \in \left\{ 1, \dots, H \right\}\) be the cluster membership latent variable and \(\mathbf{z}_{i} = (I(z_{i}=1),\dots,I(z_{i}=H))^{T}\) be the vector indicator version. We have the following model.
\[\begin{align*} y_{i} | (z_{i}, \mu_{1},\dots,\mu_{H}, \sigma^{2}_{1},\dots,\sigma^{2}_{H}) &\sim N(\mu_{z_{i}}, \sigma^{2}_{z_{i}})\\ \mathbf{z}_{i} | \boldsymbol{\pi} &\sim Multinomial(1, \boldsymbol{\pi})\\ \\ p(\mu_{h},\sigma^{2}_{h}) &= p(\mu_{h}|\sigma^{2}_{h})p(\sigma^{2}_{h})\\ &= N(\mu_{h} | m, s^{2}) Inverse-Gamma(\sigma^{2}_{h} | \alpha, \beta)\\ &= \left[ \frac{1}{\sqrt{2\pi s^{2}}} \exp \left( - \frac{(\mu_{h} - m)^{2}}{2 \times s^{2}} \right) \right] \left[ \frac{\beta^{\alpha}}{\Gamma(\alpha)} (\sigma^{2}_{h})^{-\alpha - 1} \exp \left( - \frac{\beta}{\sigma^{2}_{h}} \right) \right]\\ &\text{where}\\ m &= 0\\ s^{2} &= 10^{3}\\ \alpha &= 10^{-3}\\ \beta &= 10^{-3}\\ \\ p(\boldsymbol{\pi}) &\sim Dirichlet \left( \frac{\alpha}{H}, \dots, \frac{\alpha}{H} \right)\\ \end{align*} \]
Summing out the latent categorical variable \(z_{i}\) results in the following (conditioning on parameters suppressed for simplicity) marginal density. Note the cluster membership latent variable \(z_i\) is not measured, thus, it is a discrete parameter. Stan’s Hamiltonian Monte Carlo (HMC) cannot deal with discrete parameters, this marginalization step is required fro Stan. JAGS seems to allow a discrete parameter and accepts the original model above.
\[\begin{align*} p(y) &= \sum^{H}_{z=1} p(y|z) p(z)\\ &= \sum^{H}_{z=1} \pi_{z} p(y|z)\\ &= \sum^{H}_{h=1} \pi_h N(y | \mu_{h}, \sigma^{2}_{h})\\ \end{align*} \]
normal_fixed_mixture_unordered_stan_code <- biostan::read_stan_file("./bayesianideas_density_normal_fixed_mixture_unordered.stan")
## Warning: replacing previous import 'rstan::loo' by 'rstanarm::loo' when loading 'biostan'
biostan::print_stan_code(normal_fixed_mixture_unordered_stan_code, section = NULL)
## data {
## // Hyperparameters
## real alpha;
## real beta;
## real m;
## real s_squared;
## real<lower=0> dirichlet_alpha;
##
## // Define variables in data
## // Number of observations (an integer)
## int<lower=0> n;
## // Outcome (a real vector of length n)
## real y[n];
## // Number of latent clusters
## int<lower=1> H;
##
## // Grid evaluation
## real grid_max;
## real grid_min;
## int<lower=1> grid_length;
## }
##
## transformed data {
## real s;
## real grid_step;
##
## s = sqrt(s_squared);
## grid_step = (grid_max - grid_min) / (grid_length - 1);
## }
##
## parameters {
## // Define parameters to estimate
## // Population mean (a real number)
## vector[H] mu;
## // Population variance (a positive real number)
## real<lower=0> sigma_squared[H];
## // Cluster probability
## simplex[H] Pi;
## }
##
## transformed parameters {
## // Population standard deviation (a positive real number)
## real<lower=0> sigma[H];
## // Standard deviation (derived from variance)
## sigma = sqrt(sigma_squared);
## }
##
## model {
## // Temporary vector for loop use. Need to come first before priors.
## real contributions[H];
##
## // Prior part of Bayesian inference
## // All vectorized
## // Mean
## mu ~ normal(m, s);
## // sigma^2 has inverse gamma (alpha = 1, beta = 1) prior
## sigma_squared ~ inv_gamma(alpha, beta);
## // cluster probability vector
## Pi ~ dirichlet(rep_vector(dirichlet_alpha / H, H));
##
## // Likelihood part of Bayesian inference
## // Outcome model N(mu, sigma^2) (use SD rather than Var)
## for (i in 1:n) {
## // Loop over individuals
##
## for (h in 1:H) {
## // Loop over clusters within each individual
## // Log likelihood contributions log(Pi[h] * N(y[i] | mu[h],sigma[h]))
## contributions[h] = log(Pi[h]) + normal_lpdf(y[i] | mu[h], sigma[h]);
## }
##
## // log(sum(exp(contribution element)))
## target += log_sum_exp(contributions);
##
## }
## }
##
## generated quantities {
##
## real log_f[grid_length];
##
## for (g in 1:grid_length) {
## // Definiting here avoids reporting of these intermediates.
## real contributions[H];
## real grid_value;
##
## grid_value = grid_min + grid_step * (g - 1);
## for (h in 1:H) {
## contributions[h] = log(Pi[h]) + normal_lpdf(grid_value | mu[h], sigma[h]);
## }
##
## log_f[g] = log_sum_exp(contributions);
## }
##
## }
##
normal_fixed_mixture_unordered_stan_fit <-
rstan::stan(model_code = normal_fixed_mixture_unordered_stan_code,
data = list(alpha = 10^(-3), beta = 10^(-3),
m = 0, s_squared = 10^(3),
n = nrow(data1),
y = data1$x,
dirichlet_alpha = 1,
H = 2,
grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
chains = 12)
## Warning: There were 2303 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: Examine the pairs() plot to diagnose sampling problems
print_relevant_pars(normal_fixed_mixture_unordered_stan_fit)
## Inference for Stan model: 68985fd05a725a6fbd3d2ee07905d347.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## mu[1] -1.430000e+00 2.60 1.463000e+01 -33.71 -10.16 -1.29 7.73 3.040000e+01 32 1.17
## mu[2] -2.590000e+00 2.97 1.198000e+01 -16.61 -10.19 -5.48 7.57 1.241000e+01 16 1.33
## sigma_squared[1] 3.298667e+303 NaN Inf 0.71 1.04 6.57 87.77 3.125404e+248 NaN NaN
## sigma_squared[2] 6.415190e+304 NaN Inf 0.66 0.94 5.34 14.27 2.130830e+171 NaN NaN
## Pi[1] 4.500000e-01 0.12 3.000000e-01 0.00 0.26 0.42 0.71 1.000000e+00 6 7.39
## Pi[2] 5.500000e-01 0.12 3.000000e-01 0.00 0.29 0.58 0.74 1.000000e+00 6 7.39
## sigma[1] 1.017418e+150 NaN 5.742741e+151 0.84 1.02 2.56 9.37 1.767710e+124 NaN 1.00
## sigma[2] 5.698957e+150 NaN 2.532287e+152 0.81 0.97 2.31 3.78 4.565878e+85 NaN 1.00
## lp__ -2.171300e+02 18.73 4.592000e+01 -297.49 -237.03 -188.72 -187.13 -1.858900e+02 6 31.17
##
## Samples were drawn using NUTS(diag_e) at Sat Dec 1 21:58:36 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
traceplot_all(normal_fixed_mixture_unordered_stan_fit)
pairs_plot_all(normal_fixed_mixture_unordered_stan_fit)
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in par(usr = c(usr[1:2], 0, 1.5)): Internal(pretty()): very large range.. corrected
plot_draws(normal_fixed_mixture_unordered_stan_fit)
shinystan::launch_shinystan(normal_fixed_mixture_unordered_stan_fit)
Many divergent transitions were experienced. Rhat statistics are all large. Note the Rhat for log posterior lp__ is also large. In chains 3, 8, 9, and 11, only one of the two clustered remained, resulting in essentially single normal distribution. Once the cluster probability is zero, the mean parameter mu can take any value and give the same likelihood. This is probably the reason for the wide-spread cross appearance of the pairs plot for the mu parameters.
I realized the prior for the cluster probability vector is Dirichlet(alpha / H). That is, in this case Dirichlet(0.5, 0.5), which gives a lot of mass to regions that collapses either one of the two clusters.
data_frame(x = seq(from = 0, to = 1, by = 0.01),
y = dbeta(x = x, shape1 = 0.5, shape2 = 0.5)) %>%
filter(y < Inf) %>%
ggplot(mapping = aes(x = x, y = y)) +
geom_path() +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
normal_fixed_mixture_unordered_stan_fit2 <-
rstan::stan(model_code = normal_fixed_mixture_unordered_stan_code,
data = list(alpha = 10^(-3), beta = 10^(-3),
m = 0, s_squared = 10^(3),
n = nrow(data1),
y = data1$x,
dirichlet_alpha = 3,
H = 2,
grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
chains = 12)
## Warning: There were 625 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: Examine the pairs() plot to diagnose sampling problems
print_relevant_pars(normal_fixed_mixture_unordered_stan_fit2)
## Inference for Stan model: 68985fd05a725a6fbd3d2ee07905d347.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## mu[1] -1.760000e+00 3.36 1.236000e+01 -15.21 -10.20 -9.99 7.77 1.695000e+01 14 1.41
## mu[2] -7.700000e-01 3.53 8.670000e+00 -10.38 -10.15 1.71 7.77 8.620000e+00 6 19.05
## sigma_squared[1] 2.596583e+304 NaN Inf 0.69 0.92 1.85 7.16 2.680406e+196 NaN NaN
## sigma_squared[2] 9.880000e+00 7.74 1.940000e+01 0.70 0.98 5.00 7.47 7.751000e+01 6 4.90
## Pi[1] 4.800000e-01 0.10 2.400000e-01 0.01 0.29 0.48 0.70 7.800000e-01 6 5.16
## Pi[2] 5.200000e-01 0.10 2.400000e-01 0.22 0.30 0.52 0.71 9.900000e-01 6 5.16
## sigma[1] 4.765329e+150 NaN 1.610754e+152 0.83 0.96 1.36 2.68 1.619936e+98 NaN 1.00
## sigma[2] 2.410000e+00 0.81 2.020000e+00 0.84 0.99 2.24 2.73 8.800000e+00 6 5.70
## lp__ -1.987000e+02 12.49 3.065000e+01 -300.65 -190.81 -189.32 -188.39 -1.873800e+02 6 19.81
##
## Samples were drawn using NUTS(diag_e) at Sat Dec 1 21:59:02 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
traceplot_all(normal_fixed_mixture_unordered_stan_fit2)
pairs_plot_all(normal_fixed_mixture_unordered_stan_fit2)
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
plot_draws(normal_fixed_mixture_unordered_stan_fit2)
shinystan::launch_shinystan(normal_fixed_mixture_unordered_stan_fit2)
I changed the prior to Dirichlet(1.5, 1.5) to encode the prior knowledge that we likely have two clusters. Only chain 12 resulted in one cluster (cluster 2 collapsed). Chain 1 assigned 1 to the cluster with mean 8 and 30%, whereas chain 11(?) assigned 2 to the cluster with mean 8 and 30%. That is, label switching. These two are effectively the same model with label permutation, thus, every chain except chain 12 have converged to the same log posterior density trajectory and the resulting density estimates have the same bimodal shape.
data_frame(x = seq(from = 0, to = 1, by = 0.01),
y = dbeta(x = x, shape1 = 1.5, shape2 = 1.5)) %>%
filter(y < Inf) %>%
ggplot(mapping = aes(x = x, y = y)) +
geom_path() +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
normal_fixed_mixture_unordered_stan_fit3 <-
rstan::stan(model_code = normal_fixed_mixture_unordered_stan_code,
data = list(alpha = 10^(-3), beta = 10^(-3),
m = 0, s_squared = 10^(3),
n = nrow(data1),
y = data1$x,
dirichlet_alpha = 10,
H = 2,
grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
chains = 12)
print_relevant_pars(normal_fixed_mixture_unordered_stan_fit3)
## Inference for Stan model: 68985fd05a725a6fbd3d2ee07905d347.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## mu[1] -5.69 3.18 7.79 -10.42 -10.24 -10.13 -5.76 8.46 6 27.92
## mu[2] 3.28 3.17 7.79 -10.34 1.85 7.55 8.00 8.77 6 16.78
## sigma_squared[1] 2.46 1.07 2.86 0.66 0.85 1.00 2.10 9.93 7 2.57
## sigma_squared[2] 5.46 1.07 3.26 0.73 2.52 5.74 7.46 12.02 9 1.69
## Pi[1] 0.59 0.07 0.17 0.25 0.50 0.66 0.70 0.77 7 3.53
## Pi[2] 0.41 0.07 0.17 0.23 0.30 0.34 0.50 0.75 7 3.53
## sigma[1] 1.38 0.29 0.75 0.81 0.92 1.00 1.45 3.15 7 3.51
## sigma[2] 2.20 0.29 0.79 0.86 1.58 2.40 2.73 3.47 7 2.28
## lp__ -194.91 0.02 1.62 -198.98 -195.73 -194.59 -193.72 -192.76 5362 1.00
##
## Samples were drawn using NUTS(diag_e) at Sat Dec 1 21:59:20 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
traceplot_all(normal_fixed_mixture_unordered_stan_fit3)
pairs_plot_all(normal_fixed_mixture_unordered_stan_fit3)
plot_draws(normal_fixed_mixture_unordered_stan_fit3)
shinystan::launch_shinystan(normal_fixed_mixture_unordered_stan_fit3)
We can encode a stronger brief about the existence of both clusters with Dirichlet(5,5). No divergent transitions were experienced. Rhat statistics still look large, but the one for the log posterior density is 1.00. For the cluster probability parameter Pi, we clearly see that the only problem left is the label switching. As a result, the resulting density estimates all have the same bimodal shape.
data_frame(x = seq(from = 0, to = 1, by = 0.01),
y = dbeta(x = x, shape1 = 5, shape2 = 5)) %>%
filter(y < Inf) %>%
ggplot(mapping = aes(x = x, y = y)) +
geom_path() +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
normal_fixed_mixture_stan_code <- biostan::read_stan_file("./bayesianideas_density_normal_fixed_mixture.stan")
biostan::print_stan_code(normal_fixed_mixture_stan_code, section = NULL)
## data {
## // Hyperparameters
## real alpha;
## real beta;
## real m;
## real s_squared;
## real<lower=0> dirichlet_alpha;
##
## // Define variables in data
## // Number of observations (an integer)
## int<lower=0> n;
## // Outcome (a real vector of length n)
## real y[n];
## // Number of latent clusters
## int<lower=1> H;
##
## // Grid evaluation
## real grid_max;
## real grid_min;
## int<lower=1> grid_length;
## }
##
## transformed data {
## real s;
## real grid_step;
##
## s = sqrt(s_squared);
## grid_step = (grid_max - grid_min) / (grid_length - 1);
## }
##
## parameters {
## // Define parameters to estimate
## // Population mean (a real number)
## ordered[H] mu;
## // Population variance (a positive real number)
## real<lower=0> sigma_squared[H];
## // Cluster probability
## simplex[H] Pi;
## }
##
## transformed parameters {
## // Population standard deviation (a positive real number)
## real<lower=0> sigma[H];
## // Standard deviation (derived from variance)
## sigma = sqrt(sigma_squared);
## }
##
## model {
## // Temporary vector for loop use. Need to come first before priors.
## real contributions[H];
##
## // Prior part of Bayesian inference
## // All vectorized
## // Mean
## mu ~ normal(m, s);
## // sigma^2 has inverse gamma (alpha = 1, beta = 1) prior
## sigma_squared ~ inv_gamma(alpha, beta);
## // cluster probability vector
## Pi ~ dirichlet(rep_vector(dirichlet_alpha / H, H));
##
## // Likelihood part of Bayesian inference
## // Outcome model N(mu, sigma^2) (use SD rather than Var)
## for (i in 1:n) {
## // Loop over individuals
##
## for (h in 1:H) {
## // Loop over clusters within each individual
## // Log likelihood contributions log(Pi[h] * N(y[i] | mu[h],sigma[h]))
## contributions[h] = log(Pi[h]) + normal_lpdf(y[i] | mu[h], sigma[h]);
## }
##
## // log(sum(exp(contribution element)))
## target += log_sum_exp(contributions);
##
## }
## }
##
## generated quantities {
##
## real log_f[grid_length];
##
## for (g in 1:grid_length) {
## // Definiting here avoids reporting of these intermediates.
## real contributions[H];
## real grid_value;
##
## grid_value = grid_min + grid_step * (g - 1);
## for (h in 1:H) {
## contributions[h] = log(Pi[h]) + normal_lpdf(grid_value | mu[h], sigma[h]);
## }
##
## log_f[g] = log_sum_exp(contributions);
## }
##
## }
##
normal_fixed_mixture_ordered_stan_fit <-
rstan::stan(model_code = normal_fixed_mixture_stan_code,
data = list(alpha = 10^(-3), beta = 10^(-3),
m = 0, s_squared = 10^(3),
n = nrow(data1),
y = data1$x,
dirichlet_alpha = 10,
H = 2,
grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
chains = 12)
print_relevant_pars(normal_fixed_mixture_ordered_stan_fit)
## Inference for Stan model: 1a37d3991c5cd122dafa78b99a6fc985.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## mu[1] -10.18 0.00 0.13 -10.44 -10.27 -10.19 -10.10 -9.93 13363 1
## mu[2] 7.78 0.00 0.53 6.74 7.43 7.77 8.13 8.85 12966 1
## sigma_squared[1] 0.95 0.00 0.18 0.66 0.82 0.92 1.05 1.37 12262 1
## sigma_squared[2] 6.98 0.02 2.26 3.84 5.42 6.58 8.09 12.56 11352 1
## Pi[1] 0.69 0.00 0.05 0.59 0.65 0.69 0.72 0.78 15323 1
## Pi[2] 0.31 0.00 0.05 0.22 0.28 0.31 0.35 0.41 15323 1
## sigma[1] 0.97 0.00 0.09 0.81 0.90 0.96 1.03 1.17 12726 1
## sigma[2] 2.61 0.00 0.40 1.96 2.33 2.56 2.84 3.54 12483 1
## lp__ -191.98 0.02 1.62 -196.06 -192.81 -191.62 -190.80 -189.88 5965 1
##
## Samples were drawn using NUTS(diag_e) at Sat Dec 1 22:00:18 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
traceplot_all(normal_fixed_mixture_ordered_stan_fit)
pairs_plot_all(normal_fixed_mixture_ordered_stan_fit)
plot_draws(normal_fixed_mixture_ordered_stan_fit)
shinystan::launch_shinystan(normal_fixed_mixture_unordered_stan_fit3)
If the only problem is label switching, we can solve it by ordering the prior on mu with probability 1. All Rhat statistics are 1 now and pairs plots appear better. The density estimates themselves do not change.
print(sessionInfo())
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS 10.14.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] bindrcpp_0.2.2 rstan_2.18.1 StanHeaders_2.18.0 forcats_0.3.0 stringr_1.3.1
## [6] dplyr_0.7.7 purrr_0.2.5 readr_1.1.1 tidyr_0.8.2 tibble_1.4.2
## [11] ggplot2_3.1.0 tidyverse_1.2.1 doRNG_1.7.1 rngtools_1.3.1 pkgmaker_0.27
## [16] registry_0.5 doParallel_1.0.14 iterators_1.0.10 foreach_1.4.4 knitr_1.20
##
## loaded via a namespace (and not attached):
## [1] minqa_1.2.4 colorspace_1.3-2 ggridges_0.5.1 rsconnect_0.8.8
## [5] rprojroot_1.3-2 ggstance_0.3.1 markdown_0.8 base64enc_0.1-3
## [9] rstudioapi_0.8 svUnit_0.7-12 DT_0.4 lubridate_1.7.4
## [13] xml2_1.2.0 codetools_0.2-15 splines_3.5.1 shinythemes_1.1.1
## [17] bayesplot_1.6.0 jsonlite_1.5 nloptr_1.2.1 broom_0.5.0
## [21] shiny_1.1.0 compiler_3.5.1 httr_1.3.1 backports_1.1.2
## [25] assertthat_0.2.0 Matrix_1.2-14 lazyeval_0.2.1 cli_1.0.1
## [29] later_0.7.5 htmltools_0.3.6 prettyunits_1.0.2 tools_3.5.1
## [33] igraph_1.2.2 coda_0.19-2 gtable_0.2.0 glue_1.3.0
## [37] reshape2_1.4.3 Rcpp_0.12.19 cellranger_1.1.0 nlme_3.1-137
## [41] crosstalk_1.0.0 ps_1.2.0 lme4_1.1-18-1 rvest_0.3.2
## [45] mime_0.6 miniUI_0.1.1.1 tidybayes_1.0.3 gtools_3.8.1
## [49] MASS_7.3-51 zoo_1.8-4 scales_1.0.0 rstanarm_2.18.1
## [53] colourpicker_1.0 hms_0.4.2 promises_1.0.1 inline_0.3.15
## [57] shinystan_2.5.0 yaml_2.2.0 gridExtra_2.3 loo_2.0.0
## [61] stringi_1.2.4 dygraphs_1.1.1.6 pkgbuild_1.0.2 bibtex_0.4.2
## [65] rlang_0.3.0.1 pkgconfig_2.0.2 matrixStats_0.54.0 evaluate_0.12
## [69] lattice_0.20-35 bindr_0.1.1 splines2_0.2.8 rstantools_1.5.1
## [73] htmlwidgets_1.3 labeling_0.3 processx_3.2.0 tidyselect_0.2.5
## [77] biostan_0.1.0 plyr_1.8.4 magrittr_1.5 R6_2.3.0
## [81] pillar_1.3.0 haven_1.1.2 withr_2.1.2 xts_0.11-1
## [85] survival_2.43-1 modelr_0.1.2 crayon_1.3.4 arrayhelpers_1.0-20160527
## [89] KernSmooth_2.23-15 rmarkdown_1.10 grid_3.5.1 readxl_1.1.0
## [93] callr_3.0.0 threejs_0.3.1 digest_0.6.18 xtable_1.8-3
## [97] httpuv_1.4.5 stats4_3.5.1 munsell_0.5.0 shinyjs_1.0
## Record execution time and multicore use
end_time <- Sys.time()
diff_time <- difftime(end_time, start_time, units = "auto")
cat("Started ", as.character(start_time), "\n",
"Finished ", as.character(end_time), "\n",
"Time difference of ", diff_time, " ", attr(diff_time, "units"), "\n",
"Used ", foreach::getDoParWorkers(), " cores\n",
"Used ", foreach::getDoParName(), " as backend\n",
sep = "")
## Started 2018-12-01 21:57:52
## Finished 2018-12-01 22:00:32
## Time difference of 2.657673 mins
## Used 12 cores
## Used doParallelMC as backend