## ### Using 12 cores
## ### Using doParallelMC as backend

About

This document is based on the lecture “Bayesian Thinking: Fundamentals, Computation, and Multilevel Modeling” by Dr. Jim Albert (http://www-math.bgsu.edu/~albert/) held at Harvard T.H. Chan School of Public Health on Oct 1-2, 2018.

References

Load packages

library(tidyverse)
## devtools::install_github("rmcelreath/rethinking")
library(rethinking)
library(shinystan)
library(tidybayes)

set.seed(41245674)

Prior specification

\(f(y) = \int f(y|\mu,\sigma) g_{1}(\mu) g_{2}(\sigma) \text{d}\mu \text{d}\sigma\)

n_prior_sample <- 10^4
mu_mean  <- 178
mu_sd <- 20
sigma_min <- 0
sigma_max <- 50
## Independence assumed. OK to sample separately.
prior_sample <- data_frame(mu = rnorm(n_prior_sample, mu_mean, mu_sd),
                           sigma = runif(n_prior_sample, sigma_min, sigma_max)) %>%
    ## Sample from the prior distribution of heights.
    mutate(prior_y = rnorm(n_prior_sample, mu, sigma))
prior_sample
## # A tibble: 10,000 x 3
##       mu  sigma prior_y
##    <dbl>  <dbl>   <dbl>
##  1  220. 48.3      289.
##  2  175. 32.3      136.
##  3  147.  7.21     151.
##  4  177. 13.4      192.
##  5  192. 21.0      175.
##  6  196. 15.5      191.
##  7  213. 22.2      180.
##  8  153.  1.16     154.
##  9  179.  3.48     179.
## 10  185.  0.819    185.
## # ... with 9,990 more rows
## Plot
ggplot(data = prior_sample, mapping = aes(x = prior_y)) +
    geom_density() +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          legend.key = element_blank(),
          plot.title = element_text(hjust = 0.5),
          strip.background = element_blank())

One may argue there is too much probability assigned to < 100cm and > 250cm, meaning the stated prior beliefs on the parameters may be somewhat unrealistic. Here we will go ahead and see how the prior beliefs become modified by data.

In the following a different prior was used due to convergence issues.

n_prior_sample <- 10^4
mu_mean  <- 178
mu_sd <- 20
sigma_exp <- 25
## Independence assumed. OK to sample separately.
prior_sample <- data_frame(mu = rnorm(n_prior_sample, mu_mean, mu_sd),
                           sigma = rexp(n_prior_sample, sigma_exp)) %>%
    ## Sample from the prior distribution of heights.
    mutate(prior_y = rnorm(n_prior_sample, mu, sigma))
prior_sample
## # A tibble: 10,000 x 3
##       mu    sigma prior_y
##    <dbl>    <dbl>   <dbl>
##  1  158. 0.125       158.
##  2  181. 0.0302      181.
##  3  162. 0.0157      162.
##  4  177. 0.00496     177.
##  5  178. 0.0283      178.
##  6  196. 0.00489     196.
##  7  162. 0.000210    162.
##  8  178. 0.230       178.
##  9  208. 0.0712      208.
## 10  182. 0.112       182.
## # ... with 9,990 more rows
## Plot
ggplot(data = prior_sample, mapping = aes(x = prior_y)) +
    geom_density() +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          legend.key = element_blank(),
          plot.title = element_text(hjust = 0.5),
          strip.background = element_blank())

This looks like a more realistic prior belief about the distribution of heights.

Load data

We will use height data from 352 adult individuals. No covariates are considered.

data(Howell1, package = "rethinking")
Howell1 <- Howell1 %>%
    as_data_frame() %>%
    filter(age >= 18)
Howell1
## # A tibble: 352 x 4
##    height weight   age  male
##     <dbl>  <dbl> <dbl> <int>
##  1   152.   47.8    63     1
##  2   140.   36.5    63     0
##  3   137.   31.9    65     0
##  4   157.   53.0    41     1
##  5   145.   41.3    51     0
##  6   164.   63.0    35     1
##  7   149.   38.2    32     0
##  8   169.   55.5    27     1
##  9   148.   34.9    19     0
## 10   165.   54.5    54     1
## # ... with 342 more rows

Stan model

The model stated above can be coded as follows in stan.

print(system("cat ./height.stan", intern = TRUE), quote = FALSE)
##  [1] data {                                                             
##  [2]     /* Hyperparameters are given as data */                        
##  [3]     real mu_mean;                                                  
##  [4]     real mu_sd;                                                    
##  [5]     real<lower=0> sigma_exp;                                       
##  [6]                                                                    
##  [7]     /* Define variables in data */                                 
##  [8]     /* Number of observations (an integer) */                      
##  [9]     int<lower=0> n;                                                
## [10]     /* Outcome (a real vector of length n) */                      
## [11]     real y[n];                                                     
## [12] }                                                                  
## [13]                                                                    
## [14]                                                                    
## [15] parameters {                                                       
## [16]     /* Define parameters to estimate */                            
## [17]     /* Population mean (a real number) */                          
## [18]     real mu;                                                       
## [19]     /* Population SD (a positive real number) */                   
## [20]     real<lower=0> sigma;                                           
## [21] }                                                                  
## [22]                                                                    
## [23]                                                                    
## [24] model {                                                            
## [25]     /* Prior part of Bayesian inference */                         
## [26]     /* Flat prior for mu (no need to specify if non-informative) */
## [27]     mu ~ normal(mu_mean, mu_sd);                                   
## [28]     sigma ~ exponential(sigma_exp);                                
## [29]                                                                    
## [30]                                                                    
## [31]     /* Likelihood part of Bayesian inference */                    
## [32]     /* Outcome model N(mu, sigma) */                               
## [33]     y ~ normal(mu, sigma);                                         
## [34] }

Fitting can be conducted using the stan function in the rstan package.

fit <- rstan::stan(file = "./height.stan",
                   data = list(n = nrow(Howell1),
                               y = Howell1$height,
                               ## Hyperparameter values
                               mu_mean = 178,
                               mu_sd = 20,
                               sigma_exp = 25),
            iter = 1000, chains = 4)
## 
## SAMPLING FOR MODEL 'height' NOW (CHAIN 1).
## 
## Gradient evaluation took 4.7e-05 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.47 seconds.
## Adjust your expectations accordingly!
## 
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## 
##  Elapsed Time: 0.07397 seconds (Warm-up)
##                0.056313 seconds (Sampling)
##                0.130283 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'height' NOW (CHAIN 2).
## 
## Gradient evaluation took 2.9e-05 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.29 seconds.
## Adjust your expectations accordingly!
## 
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## 
##  Elapsed Time: 0.109342 seconds (Warm-up)
##                0.107164 seconds (Sampling)
##                0.216506 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'height' NOW (CHAIN 3).
## 
## Gradient evaluation took 2.4e-05 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.24 seconds.
## Adjust your expectations accordingly!
## 
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## 
##  Elapsed Time: 0.067465 seconds (Warm-up)
##                0.060244 seconds (Sampling)
##                0.127709 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'height' NOW (CHAIN 4).
## 
## Gradient evaluation took 2.4e-05 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.24 seconds.
## Adjust your expectations accordingly!
## 
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## 
##  Elapsed Time: 0.090759 seconds (Warm-up)
##                0.082279 seconds (Sampling)
##                0.173038 seconds (Total)

The shinystan package provides interactive diagnostics (not run here).

shinystan::launch_shinystan(fit)

Show results. The effective sample size increased a lot after changing the prior from Uniform(0,50) to Exponential(25).

fit
## Inference for Stan model: height.
## 4 chains, each with iter=1000; warmup=500; thin=1; 
## post-warmup draws per chain=500, total post-warmup draws=2000.
## 
##           mean se_mean   sd     2.5%      25%      50%      75%    97.5% n_eff Rhat
## mu      154.62    0.01 0.35   153.93   154.37   154.62   154.84   155.32  1312 1.00
## sigma     6.43    0.00 0.18     6.08     6.30     6.42     6.55     6.80  1432 1.00
## lp__  -1070.10    0.03 1.04 -1072.87 -1070.49 -1069.78 -1069.35 -1069.10   884 1.01
## 
## Samples were drawn using NUTS(diag_e) at Sat Oct  6 05:23:32 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Show the summary results.

summary(fit)
## $summary
##               mean     se_mean        sd         2.5%          25%          50%          75%        97.5%     n_eff
## mu      154.616041 0.009766458 0.3536945   153.933597   154.368782   154.615028   154.838072   155.315303 1311.5428
## sigma     6.428279 0.004842861 0.1832864     6.083154     6.304286     6.422239     6.548656     6.797379 1432.3744
## lp__  -1070.095105 0.034902857 1.0375120 -1072.867126 -1070.490223 -1069.779194 -1069.350191 -1069.101708  883.6174
##            Rhat
## mu    1.0022927
## sigma 0.9998109
## lp__  1.0059778
## 
## $c_summary
## , , chains = chain:1
## 
##          stats
## parameter         mean        sd         2.5%         25%          50%          75%        97.5%
##     mu      154.617513 0.3631546   153.919473   154.36201   154.609607   154.852119   155.319546
##     sigma     6.436915 0.1870575     6.091419     6.30579     6.430188     6.555873     6.799664
##     lp__  -1070.140110 1.0256265 -1073.018097 -1070.52773 -1069.831417 -1069.407504 -1069.100218
## 
## , , chains = chain:2
## 
##          stats
## parameter         mean        sd         2.5%          25%          50%          75%        97.5%
##     mu      154.614191 0.3463174   153.978934   154.357600   154.635913   154.840282   155.305731
##     sigma     6.428466 0.1822258     6.112142     6.293464     6.420475     6.550305     6.815281
##     lp__  -1070.065843 0.9694640 -1072.662628 -1070.434190 -1069.813109 -1069.353337 -1069.115944
## 
## , , chains = chain:3
## 
##          stats
## parameter         mean        sd         2.5%          25%         50%          75%        97.5%
##     mu      154.612200 0.3330706   153.948240   154.388763   154.60740   154.817022   155.282215
##     sigma     6.422648 0.1795321     6.072934     6.311886     6.40535     6.544497     6.795871
##     lp__  -1070.015628 0.9989886 -1072.613160 -1070.399933 -1069.66202 -1069.304923 -1069.100123
## 
## , , chains = chain:4
## 
##          stats
## parameter         mean        sd         2.5%          25%          50%          75%        97.5%
##     mu      154.620258 0.3719678   153.896870   154.372740   154.618883   154.848457   155.361143
##     sigma     6.425089 0.1844787     6.082604     6.299737     6.424582     6.543414     6.777316
##     lp__  -1070.158840 1.1441854 -1073.229305 -1070.606984 -1069.826401 -1069.364397 -1069.101971

Show traceplots.

rstan::traceplot(fit)

Get posterior values.

df_fit <- tidybayes::tidy_draws(fit)
df_fit
## # A tibble: 2,000 x 6
##    .chain .iteration .draw    mu sigma   lp__
##     <int>      <int> <int> <dbl> <dbl>  <dbl>
##  1      1          1     1  155.  6.67 -1070.
##  2      1          2     2  155.  6.07 -1071.
##  3      1          3     3  155.  6.77 -1071.
##  4      1          4     4  155.  6.70 -1070.
##  5      1          5     5  155.  6.64 -1070.
##  6      1          6     6  154.  6.61 -1070.
##  7      1          7     7  154.  6.23 -1070.
##  8      1          8     8  154.  6.24 -1070.
##  9      1          9     9  155.  6.43 -1069.
## 10      1         10    10  154.  6.43 -1069.
## # ... with 1,990 more rows

Show a 2D traceplot.

ggplot(data = df_fit, mapping = aes(x = mu, y = sigma, group = .chain, color = factor(.chain))) +
    geom_path(size = 0.1) +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          legend.key = element_blank(),
          plot.title = element_text(hjust = 0.5),
          strip.background = element_blank())

Posterior distribution of heights. Our unimodal model seems somewhat different from the observed data.

df_fit %>%
    mutate(height = rnorm(length(mu), mu, sigma)) %>%
    ggplot(mapping = aes(x = height)) +
    geom_density() +
    geom_density(data = Howell1, color = "red") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          legend.key = element_blank(),
          plot.title = element_text(hjust = 0.5),
          strip.background = element_blank())

By the design of the model (overall mean only), the posterior predictive distribution is unimodal and the fit appears poor.

Posterior probability of \(\mu > 155\)

df_fit %>%
    summarize(prob_gr_155 = mean(mu > 155))
## # A tibble: 1 x 1
##   prob_gr_155
##         <dbl>
## 1       0.138
## Posterior intervals and point estimates
stan_plot(fit, pars = c("mu","sigma"))

## Traceplots
stan_trace(fit, pars = c("mu","sigma"))

## Histograms
stan_hist(fit, pars = c("mu","sigma"))

## Kernel density estimates
stan_dens(fit, pars = c("mu","sigma"))

## Scatterplots
stan_scat(fit, pars = c("mu","sigma"))

## Diagnostics for Hamiltonian Monte Carlo and the No-U-Turn Sampler
stan_diag(fit)

## Rhat
stan_rhat(fit)

## Ratio of effective sample size to total posterior sample size
stan_ess(fit)

## Ratio of Monte Carlo standard error to posterior standard deviation
stan_mcse(fit)

## Autocorrelation
stan_ac(fit)

rstanarm

A wrapper package rstanarm can simplify coding.

library(rstanarm)
fit_rstanarm <- rstanarm::stan_glm(formula = height ~ 1,
                                   family = gaussian(),
                                   data = Howell1,
                                   prior_intercept = normal(178, 20, autoscale = FALSE),
                                   ## Uniform is not supported
                                   prior_aux = exponential(25))

The underlying stan code can be extracted, but it is very complicated.

cat(rstan::get_stancode(fit_rstanarm$stanfit))
## #include /pre/Columbia_copyright.stan
## #include /pre/license.stan
## 
## // GLM for a Gaussian, Gamma, inverse Gaussian, or Beta outcome
## functions {
## #include /functions/common_functions.stan
## #include /functions/continuous_likelihoods.stan
## #include /functions/SSfunctions.stan
## 
##   /** 
##   * test function for csr_matrix_times_vector
##   *
##   * @param m Integer number of rows
##   * @param n Integer number of columns
##   * @param w Vector (see reference manual)
##   * @param v Integer array (see reference manual)
##   * @param u Integer array (see reference manual)
##   * @param b Vector that is multiplied from the left by the CSR matrix
##   * @return A vector that is the product of the CSR matrix and b
##   */
##   vector test_csr_matrix_times_vector(int m, int n, vector w, 
##                                       int[] v, int[] u, vector b) {
##     return csr_matrix_times_vector(m, n, w, v, u, b); 
##   }
## }
## data {
##   // declares N, K, X, xbar, dense_X, nnz_x, w_x, v_x, u_x
## #include /data/NKX.stan
##   int<lower=0> len_y;      // length of y
##   real lb_y; // lower bound on y
##   real<lower=lb_y> ub_y; // upper bound on y
##   vector<lower=lb_y, upper=ub_y>[len_y] y; // continuous outcome
##   int<lower=1,upper=4> family; // 1 gaussian, 2 gamma, 3 inv-gaussian, 4 beta
##   // declares prior_PD, has_intercept, link, prior_dist, prior_dist_for_intercept
## #include /data/data_glm.stan
##   // declares has_weights, weights, has_offset, offset
## #include /data/weights_offset.stan
##   // declares prior_{mean, scale, df}, prior_{mean, scale, df}_for_intercept, prior_{mean, scale, df}_for_aux
## #include /data/hyperparameters.stan
##   // declares t, p[t], l[t], q, len_theta_L, shape, scale, {len_}concentration, {len_}regularization
## #include /data/glmer_stuff.stan
##   // declares num_not_zero, w, v, u
## #include /data/glmer_stuff2.stan
## #include /data/data_betareg.stan
## 
##   int<lower=0,upper=10> SSfun; // nonlinear function indicator, 0 for identity
##   vector[SSfun > 0  ? len_y : 0] input;
##   vector[SSfun == 5 ? len_y : 0] Dose;
## }
## transformed data {
##   vector[family == 3 ? len_y : 0] sqrt_y;
##   vector[family == 3 ? len_y : 0] log_y;
##   real sum_log_y = family == 1 ? not_a_number() : sum(log(y));
##   int<lower=1> V[special_case ? t : 0, len_y] = make_V(len_y, special_case ? t : 0, v);
##   int<lower=0> hs_z;                  // for tdata_betareg.stan
##   // defines hs, len_z_T, len_var_group, delta, is_continuous, pos
## #include /tdata/tdata_glm.stan
##   // defines hs_z
## #include /tdata/tdata_betareg.stan
##   is_continuous = 1;
## 
##   if (family == 3) {
##     sqrt_y = sqrt(y);
##     log_y = log(y);
##   }
## }
## parameters {
##   real<lower=make_lower(family, link),upper=make_upper(family,link)> gamma[has_intercept];
##   // declares z_beta, global, local, z_b, z_T, rho, zeta, tau
## #include /parameters/parameters_glm.stan
##   real<lower=0> aux_unscaled; // interpretation depends on family!
## #include /parameters/parameters_betareg.stan
## }
## transformed parameters {
##   // aux has to be defined first in the hs case
##   real aux = prior_dist_for_aux == 0 ? aux_unscaled : (prior_dist_for_aux <= 2 ? 
##              prior_scale_for_aux * aux_unscaled + prior_mean_for_aux :
##              prior_scale_for_aux * aux_unscaled);
## 
##   vector[z_dim] omega; // used in tparameters_betareg.stan
##   // defines beta, b, theta_L
## #include /tparameters/tparameters_glm.stan
## #include /tparameters/tparameters_betareg.stan
##   
##   if (prior_dist_for_aux == 0) // none
##     aux = aux_unscaled;
##   else {
##     aux = prior_scale_for_aux * aux_unscaled;
##     if (prior_dist_for_aux <= 2) // normal or student_t
##       aux = aux + prior_mean_for_aux;
##   }
## 
##   if (t > 0) {
##     if (special_case == 1) {
##       int start = 1;
##       theta_L = scale .* tau * aux;
##       if (t == 1) b = theta_L[1] * z_b;
##       else for (i in 1:t) {
##         int end = start + l[i] - 1;
##         b[start:end] = theta_L[i] * z_b[start:end];
##         start = end + 1;
##       }
##     }
##     else {
##       theta_L = make_theta_L(len_theta_L, p, 
##                              aux, tau, scale, zeta, rho, z_T);
##       b = make_b(z_b, theta_L, p, l);
##     }
##   }
## }
## model {
##   vector[N] eta_z; // beta regression linear predictor for phi
## #include /model/make_eta.stan
##   if (t > 0) {
## #include /model/eta_add_Zb.stan
##   }
##   if (has_intercept == 1) {
##     if ((family == 1 || link == 2) || (family == 4 && link != 5)) eta = eta + gamma[1];
##     else if (family == 4 && link == 5) eta = eta - max(eta) + gamma[1];
##     else eta = eta - min(eta) + gamma[1];
##   }
##   else {
## #include /model/eta_no_intercept.stan
##   }
## 
##   if (SSfun > 0) { // nlmer
##     matrix[len_y, K] P;
##     P = reshape_vec(eta, len_y, K);
##     if (SSfun < 5) {
##       if (SSfun <= 2) {
##         if (SSfun == 1) target += normal_lpdf(y | SS_asymp(input, P), aux);
##         else target += normal_lpdf(y | SS_asympOff(input, P), aux);
##       }
##       else if (SSfun == 3) target += normal_lpdf(y | SS_asympOrig(input, P), aux);
##       else {
##         for (i in 1:len_y) P[i,1] = P[i,1] + exp(P[i,3]); // ordering constraint
##         target += normal_lpdf(y | SS_biexp(input, P), aux);
##       }
##     }
##     else {
##       if (SSfun <= 7) {
##         if (SSfun == 5) target += normal_lpdf(y | SS_fol(Dose, input, P), aux);
##         else if (SSfun == 6) target += normal_lpdf(y | SS_fpl(input, P), aux);
##         else target += normal_lpdf(y | SS_gompertz(input, P), aux);
##       }
##       else {
##         if (SSfun == 8) target += normal_lpdf(y | SS_logis(input, P), aux);
##         else if (SSfun == 9) target += normal_lpdf(y | SS_micmen(input, P), aux);
##         else target += normal_lpdf(y | SS_weibull(input, P), aux);
##       }
##     }
##   }
##   else if (has_weights == 0 && prior_PD == 0) { // unweighted log-likelihoods
## #include /model/make_eta_z.stan
##     // adjust eta_z according to links
##     if (has_intercept_z == 1) {
##       if (link_phi > 1) {
##         eta_z = eta_z - min(eta_z) + gamma_z[1];
##       }
##       else {
##         eta_z = eta_z + gamma_z[1];
##       }
##     }
##     else { // has_intercept_z == 0
## #include /model/eta_z_no_intercept.stan
##     }
##     if (family == 1) {
##       if (link == 1) 
##         target += normal_lpdf(y | eta, aux);
##       else if (link == 2) 
##         target += normal_lpdf(y | exp(eta), aux);
##       else 
##         target += normal_lpdf(y | inv(eta), aux);
##     }
##     else if (family == 2) {
##       target += GammaReg(y, eta, aux, link, sum_log_y);
##     }
##     else if (family == 3) {
##       target += inv_gaussian(y, linkinv_inv_gaussian(eta, link), 
##                              aux, sum_log_y, sqrt_y);
##     }
##     else if (family == 4 && link_phi == 0) {
##       vector[N] mu;
##       mu = linkinv_beta(eta, link);
##       target += beta_lpdf(y | mu * aux, (1 - mu) * aux);
##     }
##     else if (family == 4 && link_phi > 0) {
##       vector[N] mu;
##       vector[N] mu_z;
##       mu = linkinv_beta(eta, link);
##       mu_z = linkinv_beta_z(eta_z, link_phi);
##       target += beta_lpdf(y | rows_dot_product(mu, mu_z), 
##                           rows_dot_product((1 - mu) , mu_z));
##     }
##   }
##   else if (prior_PD == 0) { // weighted log-likelihoods
##     vector[N] summands;
##     if (family == 1) summands = pw_gauss(y, eta, aux, link);
##     else if (family == 2) summands = pw_gamma(y, eta, aux, link);
##     else if (family == 3) summands = pw_inv_gaussian(y, eta, aux, link, log_y, sqrt_y);
##     else if (family == 4 && link_phi == 0) summands = pw_beta(y, eta, aux, link);
##     else if (family == 4 && link_phi > 0) summands = pw_beta_z(y, eta, eta_z, link, link_phi);
##     target += dot_product(weights, summands);
##   }
## 
##   // Log-priors
##   if (prior_dist_for_aux > 0 && prior_scale_for_aux > 0) {
##     real log_half = -0.693147180559945286;
##     if (prior_dist_for_aux == 1)
##       target += normal_lpdf(aux_unscaled | 0, 1) - log_half;
##     else if (prior_dist_for_aux == 2)
##       target += student_t_lpdf(aux_unscaled | prior_df_for_aux, 0, 1) - log_half;
##     else 
##      target += exponential_lpdf(aux_unscaled | 1);
##   }
##     
## #include /model/priors_glm.stan
## #include /model/priors_betareg.stan
##   if (t > 0) decov_lp(z_b, z_T, rho, zeta, tau, 
##                       regularization, delta, shape, t, p);
## }
## generated quantities {
##   real alpha[has_intercept];
##   real omega_int[has_intercept_z];
##   real mean_PPD = 0;
##   if (has_intercept == 1) {
##     if (dense_X) alpha[1] = gamma[1] - dot_product(xbar, beta);
##     else alpha[1] = gamma[1];
##   }
##   if (has_intercept_z == 1) { 
##     omega_int[1] = gamma_z[1] - dot_product(zbar, omega);  // adjust betareg intercept 
##   }
##   {
##     vector[N] eta_z;
## #include /model/make_eta.stan
##     if (t > 0) {
## #include /model/eta_add_Zb.stan
##     }
##     if (has_intercept == 1) {
##       if (make_lower(family,link) == negative_infinity() &&
##           make_upper(family,link) == positive_infinity()) eta = eta + gamma[1];
##       else if (family == 4 && link == 5) {
##         real max_eta = max(eta);
##         alpha[1] = alpha[1] - max_eta;
##         eta = eta - max_eta + gamma[1];
##       }
##       else {
##         real min_eta = min(eta);
##         alpha[1] = alpha[1] - min_eta;
##         eta = eta - min_eta + gamma[1];
##       }
##     }
##     else {
## #include /model/eta_no_intercept.stan
##     }
## #include /model/make_eta_z.stan
##     // adjust eta_z according to links
##     if (has_intercept_z == 1) {
##       if (link_phi > 1) {
##         omega_int[1] = omega_int[1] - min(eta_z);
##         eta_z = eta_z - min(eta_z) + gamma_z[1];
##       }
##       else {
##         eta_z = eta_z + gamma_z[1];
##       }
##     }
##     else { // has_intercept_z == 0
## #include /model/eta_z_no_intercept.stan
##     }
##     
##     if (SSfun > 0) { // nlmer
##       vector[len_y] eta_nlmer;
##       matrix[len_y, K] P;      
##       P = reshape_vec(eta, len_y, K);
##       if (SSfun < 5) {
##         if (SSfun <= 2) {
##           if (SSfun == 1) eta_nlmer = SS_asymp(input, P);
##           else eta_nlmer = SS_asympOff(input, P);
##         }
##         else if (SSfun == 3) eta_nlmer = SS_asympOrig(input, P);
##         else eta_nlmer = SS_biexp(input, P);
##       }
##       else {
##         if (SSfun <= 7) {
##           if (SSfun == 5) eta_nlmer = SS_fol(Dose, input, P);
##           else if (SSfun == 6) eta_nlmer = SS_fpl(input, P);
##           else eta_nlmer = SS_gompertz(input, P);
##         }
##         else {
##           if (SSfun == 8) eta_nlmer = SS_logis(input, P);
##           else if (SSfun == 9) eta_nlmer = SS_micmen(input, P);
##           else eta_nlmer = SS_weibull(input, P);
##         }
##       }
##       for (n in 1:len_y) mean_PPD = mean_PPD + normal_rng(eta_nlmer[n], aux);
##     }
##     else if (family == 1) {
##       if (link > 1) eta = linkinv_gauss(eta, link);
##       for (n in 1:len_y) mean_PPD = mean_PPD + normal_rng(eta[n], aux);
##     }
##     else if (family == 2) {
##       if (link > 1) eta = linkinv_gamma(eta, link);
##       for (n in 1:len_y) mean_PPD = mean_PPD + gamma_rng(aux, aux / eta[n]);
##     }
##     else if (family == 3) {
##       if (link > 1) eta = linkinv_inv_gaussian(eta, link);
##       for (n in 1:len_y) mean_PPD = mean_PPD + inv_gaussian_rng(eta[n], aux);
##     }
##     else if (family == 4 && link_phi == 0) { 
##       eta = linkinv_beta(eta, link);
##       for (n in 1:N) {
##         real eta_n = eta[n];
##         if (aux <= 0) mean_PPD = mean_PPD + bernoulli_rng(0.5);
##         else if (eta_n >= 1) mean_PPD = mean_PPD + 1;
##         else if (eta_n > 0)
##           mean_PPD = mean_PPD + beta_rng(eta[n] * aux, (1 - eta[n]) * aux);
##       }
##     }
##     else if (family == 4 && link_phi > 0) {
##       eta = linkinv_beta(eta, link);
##       eta_z = linkinv_beta_z(eta_z, link_phi);
##       for (n in 1:N) {
##         real eta_n = eta[n];
##         real aux_n = eta_z[n];
##         if (aux_n <= 0) mean_PPD = mean_PPD + bernoulli_rng(0.5);
##         else if (eta_n >= 1) mean_PPD = mean_PPD + 1;
##         else if (eta_n > 0)
##           mean_PPD = mean_PPD + beta_rng(eta_n * aux_n, (1 - eta_n) * aux_n);
##       }
##     }
##     mean_PPD = mean_PPD / len_y;
##   }
## }

The prior specification can be examined as follows.

rstanarm::prior_summary(fit_rstanarm)
## Priors for model 'fit_rstanarm' 
## ------
## Intercept (after predictors centered)
##  ~ normal(location = 178, scale = 20)
## 
## Auxiliary (sigma)
##  ~ exponential(rate = 25)
##      **adjusted scale = 0.31 (adjusted rate = 1/adjusted scale)
## ------
## See help('prior_summary.stanreg') for more details

Plot to obtain a posterior figure.

plot(fit_rstanarm)

## Posterior intervals and point estimates
stan_plot(fit_rstanarm, pars = c("(Intercept)","sigma"))

## Traceplots
stan_trace(fit_rstanarm, pars = c("(Intercept)","sigma"))

## Histograms
stan_hist(fit_rstanarm, pars = c("(Intercept)","sigma"))

## Kernel density estimates
stan_dens(fit_rstanarm, pars = c("(Intercept)","sigma"))

## Scatterplots
stan_scat(fit_rstanarm, pars = c("(Intercept)","sigma"))

## Diagnostics for Hamiltonian Monte Carlo and the No-U-Turn Sampler
stan_diag(fit_rstanarm)

## Rhat
stan_rhat(fit_rstanarm, pars = c("(Intercept)","sigma"))

## Ratio of effective sample size to total posterior sample size
stan_ess(fit_rstanarm, pars = c("(Intercept)","sigma"))

## Ratio of Monte Carlo standard error to posterior standard deviation
stan_mcse(fit_rstanarm, pars = c("(Intercept)","sigma"))

## Autocorrelation
stan_ac(fit_rstanarm, pars = c("(Intercept)","sigma"))

The results are summarized as follows.

summary(fit_rstanarm)
## 
## Model Info:
## 
##  function:     stan_glm
##  family:       gaussian [identity]
##  formula:      height ~ 1
##  algorithm:    sampling
##  priors:       see help('prior_summary')
##  sample:       4000 (posterior sample size)
##  observations: 352
##  predictors:   1
## 
## Estimates:
##                 mean    sd      2.5%    25%     50%     75%     97.5%
## (Intercept)     154.6     0.4   153.8   154.3   154.6   154.9   155.4
## sigma             7.5     0.3     7.0     7.3     7.5     7.7     8.1
## mean_PPD        154.6     0.6   153.5   154.2   154.6   155.0   155.7
## log-posterior -1246.4     1.0 -1248.8 -1246.8 -1246.1 -1245.7 -1245.4
## 
## Diagnostics:
##               mcse Rhat n_eff
## (Intercept)   0.0  1.0  2547 
## sigma         0.0  1.0  2580 
## mean_PPD      0.0  1.0  3093 
## log-posterior 0.0  1.0  1746 
## 
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).

stan gender model

\(f(y) = \int f(y|\mu_{0},\delta,\sigma) g_{1}(\mu_{{0}}) g_{2}(\delta) g_{3}(\sigma) \text{d}\mu_{0} \text{d}\delta \text{d}\sigma\)

n_prior_sample <- 10^4
mu0_mean  <- 170
mu0_sd <- 20
delta_mean  <- 10
delta_sd <- 10
sigma_exp <- 25
p_male <- 0.5
## Independence assumed. OK to sample separately.
prior_sample <- data_frame(male = rbinom(n_prior_sample, size = 1, prob = p_male),
                           mu0 = rnorm(n_prior_sample, mu0_mean, mu0_sd),
                           delta = rnorm(n_prior_sample, delta_mean, delta_sd),
                           mu_i = mu0 + delta * male,
                           sigma = rexp(n_prior_sample, sigma_exp)) %>%
    ## Sample from the prior distribution of heights.
    mutate(prior_y = rnorm(n_prior_sample, mu_i, sigma))
prior_sample
## # A tibble: 10,000 x 6
##     male   mu0   delta  mu_i    sigma prior_y
##    <int> <dbl>   <dbl> <dbl>    <dbl>   <dbl>
##  1     0  173.  -2.82   173. 0.0267      173.
##  2     0  144.  26.4    144. 0.000673    144.
##  3     1  134.   0.895  135. 0.0759      135.
##  4     1  157.   4.44   162. 0.00199     162.
##  5     1  182.   8.24   190. 0.0454      190.
##  6     0  186. -19.8    186. 0.0335      186.
##  7     0  172.   8.34   172. 0.00865     172.
##  8     0  135.  -1.89   135. 0.0501      135.
##  9     1  174.  12.6    186. 0.0206      186.
## 10     1  183.   6.78   190. 0.00459     190.
## # ... with 9,990 more rows
## Plot
ggplot(data = prior_sample, mapping = aes(x = prior_y)) +
    geom_density() +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          legend.key = element_blank(),
          plot.title = element_text(hjust = 0.5),
          strip.background = element_blank())

The prior for the sex difference is likely too vague to create a bimodal prior predictive distribution of heights..

The model stated above can be coded as follows in stan.

print(system("cat ./height_sex.stan", intern = TRUE), quote = FALSE)
##  [1] data {                                                             
##  [2]     /* Hyperparameters are given as data */                        
##  [3]     real mu0_mean;                                                 
##  [4]     real mu0_sd;                                                   
##  [5]     real delta_mean;                                               
##  [6]     real delta_sd;                                                 
##  [7]     real<lower=0> sigma_exp;                                       
##  [8]                                                                    
##  [9]     /* Define variables in data */                                 
## [10]     /* Number of observations (an integer) */                      
## [11]     int<lower=1> n;                                                
## [12]     /* Male sex indicator */                                       
## [13]     int<lower=0, upper=1> male[n];                                 
## [14]     /* Outcome (a real vector of length n) */                      
## [15]     real y[n];                                                     
## [16] }                                                                  
## [17]                                                                    
## [18]                                                                    
## [19] parameters {                                                       
## [20]     /* Define parameters to estimate */                            
## [21]     /* Female population mean (a real number) */                   
## [22]     real mu0;                                                      
## [23]     /* Sex difference */                                           
## [24]     real delta;                                                    
## [25]     /* Population SD (a positive real number) */                   
## [26]     real<lower=0> sigma;                                           
## [27] }                                                                  
## [28]                                                                    
## [29]                                                                    
## [30] model {                                                            
## [31]     /* Prior part of Bayesian inference */                         
## [32]     /* Flat prior for mu (no need to specify if non-informative) */
## [33]     mu0 ~ normal(mu0_mean, mu0_sd);                                
## [34]     delta ~ normal(delta_mean, delta_sd);                          
## [35]     sigma ~ exponential(sigma_exp);                                
## [36]                                                                    
## [37]                                                                    
## [38]     /* Likelihood part of Bayesian inference */                    
## [39]     for (i in 1:n) {                                               
## [40]         y[i] ~ normal(mu0 + delta*male[i], sigma);                 
## [41]     }                                                              
## [42] }

Fitting can be conducted using the stan function in the rstan package.

fit_sex <- rstan::stan(file = "./height_sex.stan",
                       data = list(n = nrow(Howell1),
                                   male = Howell1$male,
                                   y = Howell1$height,
                                   ## Hyperparameter values
                                   mu0_mean = 170,
                                   mu0_sd = 20,
                                   delta_mean = 10,
                                   delta_sd = 10,
                                   sigma_exp = 25),
                       iter = 1000, chains = 4)
## 
## SAMPLING FOR MODEL 'height_sex' NOW (CHAIN 1).
## 
## Gradient evaluation took 0.00014 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 1.4 seconds.
## Adjust your expectations accordingly!
## 
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## 
##  Elapsed Time: 0.961099 seconds (Warm-up)
##                0.484578 seconds (Sampling)
##                1.44568 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'height_sex' NOW (CHAIN 2).
## 
## Gradient evaluation took 0.000207 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 2.07 seconds.
## Adjust your expectations accordingly!
## 
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## 
##  Elapsed Time: 0.994068 seconds (Warm-up)
##                0.599564 seconds (Sampling)
##                1.59363 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'height_sex' NOW (CHAIN 3).
## 
## Gradient evaluation took 0.000178 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 1.78 seconds.
## Adjust your expectations accordingly!
## 
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## 
##  Elapsed Time: 0.978648 seconds (Warm-up)
##                0.537336 seconds (Sampling)
##                1.51598 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'height_sex' NOW (CHAIN 4).
## 
## Gradient evaluation took 0.000175 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 1.75 seconds.
## Adjust your expectations accordingly!
## 
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## 
##  Elapsed Time: 0.833791 seconds (Warm-up)
##                0.43968 seconds (Sampling)
##                1.27347 seconds (Total)

The shinystan package provides interactive diagnostics (not run here).

shinystan::launch_shinystan(fit_sex)

Show results. The effective sample size increased a lot after changing the prior from Uniform(0,50) to Exponential(25).

fit_sex
## Inference for Stan model: height_sex.
## 4 chains, each with iter=1000; warmup=500; thin=1; 
## post-warmup draws per chain=500, total post-warmup draws=2000.
## 
##          mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
## mu0    149.52    0.01 0.35  148.83  149.29  149.52  149.74  150.21  1182    1
## delta   10.85    0.02 0.51    9.82   10.52   10.85   11.20   11.86  1078    1
## sigma    4.79    0.00 0.15    4.51    4.68    4.79    4.89    5.10  1165    1
## lp__  -905.51    0.05 1.27 -908.77 -906.09 -905.17 -904.56 -904.08   764    1
## 
## Samples were drawn using NUTS(diag_e) at Sat Oct  6 05:24:52 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Show the summary results.

summary(fit_sex)
## $summary
##              mean     se_mean        sd        2.5%         25%         50%        75%       97.5%     n_eff
## mu0    149.519540 0.010322366 0.3549153  148.826641  149.290095  149.521218  149.74463  150.214240 1182.1997
## delta   10.854938 0.015643569 0.5135080    9.818063   10.522748   10.846471   11.19851   11.863126 1077.5135
## sigma    4.789515 0.004439313 0.1515337    4.507359    4.679808    4.788478    4.88773    5.100379 1165.1636
## lp__  -905.505759 0.045886572 1.2680662 -908.772781 -906.090092 -905.169241 -904.56057 -904.081981  763.6822
##           Rhat
## mu0   1.003617
## delta 1.002176
## sigma 1.001174
## lp__  1.000692
## 
## $c_summary
## , , chains = chain:1
## 
##          stats
## parameter        mean        sd        2.5%         25%         50%         75%      97.5%
##     mu0    149.493763 0.3629291  148.798109  149.262765  149.514870  149.730336  150.18389
##     delta   10.875604 0.5099152    9.946888   10.520426   10.870530   11.209234   11.90917
##     sigma    4.779234 0.1401082    4.499962    4.678117    4.792342    4.866212    5.08628
##     lp__  -905.455840 1.2637180 -908.597826 -906.030081 -905.152537 -904.512547 -904.08615
## 
## , , chains = chain:2
## 
##          stats
## parameter        mean        sd        2.5%        25%         50%         75%       97.5%
##     mu0    149.512593 0.3490029  148.810404  149.28173  149.502894  149.751901  150.205716
##     delta   10.876287 0.5053849    9.884155   10.58052   10.854030   11.202886   11.923446
##     sigma    4.788163 0.1493261    4.511496    4.68579    4.787635    4.882518    5.089354
##     lp__  -905.490276 1.3280657 -908.909259 -905.99451 -905.119389 -904.519608 -904.062561
## 
## , , chains = chain:3
## 
##          stats
## parameter        mean        sd        2.5%         25%         50%         75%       97.5%
##     mu0    149.526310 0.3549025  148.842844  149.287584  149.527200  149.729970  150.306370
##     delta   10.836653 0.5029623    9.704061   10.520100   10.834727   11.165013   11.745060
##     sigma    4.783739 0.1616324    4.495947    4.667861    4.776697    4.899764    5.097192
##     lp__  -905.535855 1.2235567 -908.474618 -906.072624 -905.223587 -904.631003 -904.097277
## 
## , , chains = chain:4
## 
##          stats
## parameter        mean        sd        2.5%         25%         50%        75%       97.5%
##     mu0    149.545494 0.3517175  148.824104  149.334051  149.543955  149.76536  150.218700
##     delta   10.831207 0.5349435    9.798185   10.501684   10.824102   11.20996   11.852644
##     sigma    4.806924 0.1532626    4.522657    4.695614    4.793949    4.91300    5.118884
##     lp__  -905.541064 1.2565465 -908.744018 -906.187152 -905.223437 -904.56647 -904.084770

Show traceplots.

rstan::traceplot(fit_sex)

Get posterior values.

df_fit_sex <- tidybayes::tidy_draws(fit_sex)
df_fit_sex
## # A tibble: 2,000 x 7
##    .chain .iteration .draw   mu0 delta sigma  lp__
##     <int>      <int> <int> <dbl> <dbl> <dbl> <dbl>
##  1      1          1     1  150. 10.6   4.87 -904.
##  2      1          2     2  150. 10.8   4.93 -906.
##  3      1          3     3  150.  9.92  4.75 -906.
##  4      1          4     4  149. 11.2   4.79 -904.
##  5      1          5     5  149. 10.8   4.92 -905.
##  6      1          6     6  150. 10.7   4.94 -905.
##  7      1          7     7  149. 11.6   4.77 -907.
##  8      1          8     8  149. 11.5   4.75 -907.
##  9      1          9     9  150. 10.4   4.65 -906.
## 10      1         10    10  150. 10.3   4.69 -905.
## # ... with 1,990 more rows

Show a 2D traceplot.

ggplot(data = df_fit_sex, mapping = aes(x = mu0, y = sigma, group = .chain, color = factor(.chain))) +
    geom_path(size = 0.1) +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          legend.key = element_blank(),
          plot.title = element_text(hjust = 0.5),
          strip.background = element_blank())

Posterior distribution of heights. Our unimodal model seems somewhat different from the observed data.

df_fit_sex %>%
    mutate(male = rbinom(length(mu0), 1, mean(Howell1$male)),
           height = rnorm(length(mu0), mu0 + delta*male, sigma)) %>%
    ggplot(mapping = aes(x = height)) +
    geom_density() +
    geom_density(data = Howell1, color = "red") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          legend.key = element_blank(),
          plot.title = element_text(hjust = 0.5),
          strip.background = element_blank())

This captures the bimodal nature of the observed data much better than the previous overall mean model.

Posterior probability of \(\mu0 > 155\)

df_fit_sex %>%
    summarize(`P[mu0 > 155]` = mean(mu0 > 155),
              `P[mu0+delta > 155]` = mean(mu0+delta > 155))
## # A tibble: 1 x 2
##   `P[mu0 > 155]` `P[mu0+delta > 155]`
##            <dbl>                <dbl>
## 1              0                    1
## Posterior intervals and point estimates
stan_plot(fit_sex, pars = c("mu0","delta","sigma"))

## Traceplots
stan_trace(fit_sex, pars = c("mu0","delta","sigma"))

## Histograms
stan_hist(fit_sex, pars = c("mu0","delta","sigma"))

## Kernel density estimates
stan_dens(fit_sex, pars = c("mu0","delta","sigma"))

## Scatterplots
stan_scat(fit_sex, pars = c("mu0","delta","sigma"))
## Error: 'pars' must contain exactly two parameter names
## Diagnostics for Hamiltonian Monte Carlo and the No-U-Turn Sampler
stan_diag(fit_sex)

## Rhat
stan_rhat(fit_sex)

## Ratio of effective sample size to total posterior sample size
stan_ess(fit_sex)

## Ratio of Monte Carlo standard error to posterior standard deviation
stan_mcse(fit_sex)

## Autocorrelation
stan_ac(fit_sex)

rstanarm sex model

fit_rstanarm_sex <- rstanarm::stan_glm(formula = height ~ male,
                                       family = gaussian(),
                                       data = Howell1,
                                       prior_intercept = normal(170, 20, autoscale = FALSE),
                                       prior = normal(10, 10, autoscale = FALSE),
                                       prior_aux = exponential(25))

Show prior specifications.

rstanarm::prior_summary(fit_rstanarm_sex)
## Priors for model 'fit_rstanarm_sex' 
## ------
## Intercept (after predictors centered)
##  ~ normal(location = 170, scale = 20)
## 
## Coefficients
##  ~ normal(location = 10, scale = 10)
## 
## Auxiliary (sigma)
##  ~ exponential(rate = 25)
##      **adjusted scale = 0.31 (adjusted rate = 1/adjusted scale)
## ------
## See help('prior_summary.stanreg') for more details

Show summary.

summary(fit_rstanarm_sex)
## 
## Model Info:
## 
##  function:     stan_glm
##  family:       gaussian [identity]
##  formula:      height ~ male
##  algorithm:    sampling
##  priors:       see help('prior_summary')
##  sample:       4000 (posterior sample size)
##  observations: 352
##  predictors:   2
## 
## Estimates:
##                 mean    sd      2.5%    25%     50%     75%     97.5%
## (Intercept)     149.5     0.4   148.7   149.3   149.5   149.8   150.3
## male             10.8     0.6     9.6    10.4    10.8    11.2    12.0
## sigma             5.4     0.2     5.0     5.3     5.4     5.6     5.8
## mean_PPD        154.6     0.4   153.8   154.3   154.6   154.9   155.4
## log-posterior -1122.3     1.3 -1125.6 -1122.9 -1122.0 -1121.4 -1120.9
## 
## Diagnostics:
##               mcse Rhat n_eff
## (Intercept)   0.0  1.0  4000 
## male          0.0  1.0  4000 
## sigma         0.0  1.0  4000 
## mean_PPD      0.0  1.0  4000 
## log-posterior 0.0  1.0  1592 
## 
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).

Plot posteriors

plot(fit_rstanarm_sex)

The trace plots show good mixing.

stan_trace(fit_rstanarm_sex)

Diagnostic plots.

stan_diag(fit_rstanarm_sex)
## Warning: Removed 1 rows containing missing values (geom_bar).

rstanarm age sex model

fit_rstanarm_age_sex <- rstanarm::stan_glm(formula = height ~ age*male,
                                              family = gaussian(),
                                              data = Howell1,
                                              prior_intercept = normal(178, 20, autoscale = FALSE),
                                              prior = normal(10, 10, autoscale = FALSE),
                                              prior_aux = exponential(25))

Show prior specifications.

rstanarm::prior_summary(fit_rstanarm_age_sex)
## Priors for model 'fit_rstanarm_age_sex' 
## ------
## Intercept (after predictors centered)
##  ~ normal(location = 178, scale = 20)
## 
## Coefficients
##  ~ normal(location = [10,10,10], scale = [10,10,10])
## 
## Auxiliary (sigma)
##  ~ exponential(rate = 25)
##      **adjusted scale = 0.31 (adjusted rate = 1/adjusted scale)
## ------
## See help('prior_summary.stanreg') for more details

Show summary.

summary(fit_rstanarm_age_sex)
## 
## Model Info:
## 
##  function:     stan_glm
##  family:       gaussian [identity]
##  formula:      height ~ age * male
##  algorithm:    sampling
##  priors:       see help('prior_summary')
##  sample:       4000 (posterior sample size)
##  observations: 352
##  predictors:   4
## 
## Estimates:
##                 mean    sd      2.5%    25%     50%     75%     97.5%
## (Intercept)     151.9     1.1   149.8   151.2   151.9   152.6   154.1
## age              -0.1     0.0    -0.1    -0.1    -0.1     0.0     0.0
## male             10.9     1.6     7.8     9.9    10.9    12.0    14.0
## age:male          0.0     0.0    -0.1     0.0     0.0     0.0     0.1
## sigma             5.4     0.2     5.0     5.2     5.4     5.5     5.7
## mean_PPD        154.6     0.4   153.8   154.3   154.6   154.9   155.4
## log-posterior -1121.1     1.6 -1125.1 -1122.0 -1120.8 -1119.9 -1119.0
## 
## Diagnostics:
##               mcse Rhat n_eff
## (Intercept)   0.0  1.0  2782 
## age           0.0  1.0  2906 
## male          0.0  1.0  2001 
## age:male      0.0  1.0  2027 
## sigma         0.0  1.0  2555 
## mean_PPD      0.0  1.0  3329 
## log-posterior 0.0  1.0  1633 
## 
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).

Plot posteriors

plot(fit_rstanarm_age_sex)

The trace plots show good mixing.

stan_trace(fit_rstanarm_age_sex)

Diagnostic plots.

stan_diag(fit_rstanarm_age_sex)

rstanarm model fit assessment

## Intercept only
loo_fit_rstanarm <- loo(fit_rstanarm, cores = n_cores)
waic_fit_rstanarm <- waic(fit_rstanarm)
kfold_fit_rstanarm <- kfold(fit_rstanarm, K = 10)
## Sex
loo_fit_rstanarm_sex <- loo(fit_rstanarm_sex, cores = n_cores)
waic_fit_rstanarm_sex <- waic(fit_rstanarm_sex)
kfold_fit_rstanarm_sex <- kfold(fit_rstanarm_sex, K = 10)
## Age*Sex
loo_fit_rstanarm_age_sex <- loo(fit_rstanarm_age_sex, cores = n_cores)
waic_fit_rstanarm_age_sex <- waic(fit_rstanarm_age_sex)
## Warning: 1 (0.3%) p_waic estimates greater than 0.4. We recommend trying loo instead.
kfold_fit_rstanarm_age_sex <- kfold(fit_rstanarm_age_sex, K = 10)

Comparison can be performed with compare_models.

loo.stanreg              package:rstanarm              R Documentation

Information criteria and cross-validation

Description:

     For models fit using MCMC, compute approximate leave-one-out
     cross-validation (LOO, LOOIC) or, less preferably, the Widely
     Applicable Information Criterion (WAIC) using the ‘loo’ package.
     Functions for K-fold cross-validation, model comparison, and model
     weighting/averaging are also provided. *Note*: these functions are
     not guaranteed to work properly unless the ‘data’ argument was
     specified when the model was fit. Also, as of ‘loo’ version
     ‘2.0.0’ the default number of cores is now only 1, but we
     recommend using as many (or close to as many) cores as possible by
     setting the ‘cores’ argument or using ‘options(mc.cores = VALUE)’
     to set it for an entire session.
compare_models(loo_fit_rstanarm, loo_fit_rstanarm_sex, loo_fit_rstanarm_age_sex)
## 
## Model comparison: 
## (ordered by highest ELPD)
## 
##                      elpd_diff elpd_loo se_elpd_loo p_loo   se_p_loo looic   se_looic
## fit_rstanarm_age_sex     0.0   -1101.3     16.5         5.9     0.8   2202.6    33.0 
## fit_rstanarm_sex        -3.0   -1104.3     16.1         3.5     0.5   2208.7    32.3 
## fit_rstanarm          -120.3   -1221.6     12.3         1.8     0.2   2443.2    24.7
compare_models(waic_fit_rstanarm, waic_fit_rstanarm_sex, waic_fit_rstanarm_age_sex)
## 
## Model comparison: 
## (ordered by highest ELPD)
## 
##                      elpd_diff elpd_waic se_elpd_waic p_waic  se_p_waic waic    se_waic
## fit_rstanarm_age_sex     0.0   -1101.3      16.5          5.9     0.8    2202.6    33.0
## fit_rstanarm_sex        -3.0   -1104.3      16.1          3.5     0.5    2208.7    32.3
## fit_rstanarm          -120.3   -1221.6      12.3          1.8     0.2    2443.2    24.7
compare_models(kfold_fit_rstanarm, kfold_fit_rstanarm_sex, kfold_fit_rstanarm_age_sex)
## 
## Model comparison: 
## (ordered by highest ELPD)
## 
##                      elpd_diff elpd_kfold se_elpd_kfold
## fit_rstanarm_sex         0.0   -1103.7       15.9      
## fit_rstanarm_age_sex    -0.7   -1104.4       17.1      
## fit_rstanarm          -117.5   -1221.1       12.4
## Model averaging/weighting via stacking or pseudo-BMA weighting
loo_model_weights(stanreg_list(fit_rstanarm, fit_rstanarm_sex, fit_rstanarm_age_sex))
## Method: stacking
## ------
##                      weight
## fit_rstanarm         0.042 
## fit_rstanarm_sex     0.055 
## fit_rstanarm_age_sex 0.902