Bayesian Thinking Lecture by Jim Albert: Gaussian Model of Height

## ### Using 12 cores

## ### Using doParallelMC as backend

About

This document is based on the lecture “Bayesian Thinking: Fundamentals, Computation, and Multilevel Modeling” by Dr. Jim Albert (http://www-math.bgsu.edu/~albert/) held at Harvard T.H. Chan School of Public Health on Oct 1-2, 2018.

References

Bayesian Thinking: Fundamentals, Computation, and Multilevel Modeling by Jim Albert at HSPH on Oct 1-2, 2018.
https://github.com/bayesball/BayesComputeR
https://github.com/rmcelreath/rethinking
https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started
https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations
https://github.com/mjskay/tidybayes

Load packages

library(tidyverse)
## devtools::install_github("rmcelreath/rethinking")
library(rethinking)
library(shinystan)
library(tidybayes)

set.seed(41245674)

Prior specification

Gaussian model for height: \(y_{i} \sim N(\mu, \sigma^{2})\)
Prior for mean parameter: \(\mu \sim N(178, 20)\)
Prior for SD parameter: \(\sigma \sim Uniform(0,50)\)
Prior predictive density of the outcome is the prior belief expressed in terms of the outcome variable distribution.

\(f(y) = \int f(y|\mu,\sigma) g_{1}(\mu) g_{2}(\sigma) \text{d}\mu \text{d}\sigma\)

n_prior_sample <- 10^4
mu_mean  <- 178
mu_sd <- 20
sigma_min <- 0
sigma_max <- 50
## Independence assumed. OK to sample separately.
prior_sample <- data_frame(mu = rnorm(n_prior_sample, mu_mean, mu_sd),
                           sigma = runif(n_prior_sample, sigma_min, sigma_max)) %>%
    ## Sample from the prior distribution of heights.
    mutate(prior_y = rnorm(n_prior_sample, mu, sigma))
prior_sample

## # A tibble: 10,000 x 3
##       mu  sigma prior_y
##    <dbl>  <dbl>   <dbl>
##  1  220. 48.3      289.
##  2  175. 32.3      136.
##  3  147.  7.21     151.
##  4  177. 13.4      192.
##  5  192. 21.0      175.
##  6  196. 15.5      191.
##  7  213. 22.2      180.
##  8  153.  1.16     154.
##  9  179.  3.48     179.
## 10  185.  0.819    185.
## # ... with 9,990 more rows

## Plot
ggplot(data = prior_sample, mapping = aes(x = prior_y)) +
    geom_density() +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          legend.key = element_blank(),
          plot.title = element_text(hjust = 0.5),
          strip.background = element_blank())

One may argue there is too much probability assigned to < 100cm and > 250cm, meaning the stated prior beliefs on the parameters may be somewhat unrealistic. Here we will go ahead and see how the prior beliefs become modified by data.

In the following a different prior was used due to convergence issues.

Prior for SD parameter: \(\sigma \sim Exponential(25)\)

n_prior_sample <- 10^4
mu_mean  <- 178
mu_sd <- 20
sigma_exp <- 25
## Independence assumed. OK to sample separately.
prior_sample <- data_frame(mu = rnorm(n_prior_sample, mu_mean, mu_sd),
                           sigma = rexp(n_prior_sample, sigma_exp)) %>%
    ## Sample from the prior distribution of heights.
    mutate(prior_y = rnorm(n_prior_sample, mu, sigma))
prior_sample

## # A tibble: 10,000 x 3
##       mu    sigma prior_y
##    <dbl>    <dbl>   <dbl>
##  1  158. 0.125       158.
##  2  181. 0.0302      181.
##  3  162. 0.0157      162.
##  4  177. 0.00496     177.
##  5  178. 0.0283      178.
##  6  196. 0.00489     196.
##  7  162. 0.000210    162.
##  8  178. 0.230       178.
##  9  208. 0.0712      208.
## 10  182. 0.112       182.
## # ... with 9,990 more rows

## Plot
ggplot(data = prior_sample, mapping = aes(x = prior_y)) +
    geom_density() +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          legend.key = element_blank(),
          plot.title = element_text(hjust = 0.5),
          strip.background = element_blank())

This looks like a more realistic prior belief about the distribution of heights.

Load data

We will use height data from 352 adult individuals. No covariates are considered.

data(Howell1, package = "rethinking")
Howell1 <- Howell1 %>%
    as_data_frame() %>%
    filter(age >= 18)
Howell1

## # A tibble: 352 x 4
##    height weight   age  male
##     <dbl>  <dbl> <dbl> <int>
##  1   152.   47.8    63     1
##  2   140.   36.5    63     0
##  3   137.   31.9    65     0
##  4   157.   53.0    41     1
##  5   145.   41.3    51     0
##  6   164.   63.0    35     1
##  7   149.   38.2    32     0
##  8   169.   55.5    27     1
##  9   148.   34.9    19     0
## 10   165.   54.5    54     1
## # ... with 342 more rows

Stan model

The model stated above can be coded as follows in stan.

print(system("cat ./height.stan", intern = TRUE), quote = FALSE)

##  [1] data {                                                             
##  [2]     /* Hyperparameters are given as data */                        
##  [3]     real mu_mean;                                                  
##  [4]     real mu_sd;                                                    
##  [5]     real<lower=0> sigma_exp;                                       
##  [6]                                                                    
##  [7]     /* Define variables in data */                                 
##  [8]     /* Number of observations (an integer) */                      
##  [9]     int<lower=0> n;                                                
## [10]     /* Outcome (a real vector of length n) */                      
## [11]     real y[n];                                                     
## [12] }                                                                  
## [13]                                                                    
## [14]                                                                    
## [15] parameters {                                                       
## [16]     /* Define parameters to estimate */                            
## [17]     /* Population mean (a real number) */                          
## [18]     real mu;                                                       
## [19]     /* Population SD (a positive real number) */                   
## [20]     real<lower=0> sigma;                                           
## [21] }                                                                  
## [22]                                                                    
## [23]                                                                    
## [24] model {                                                            
## [25]     /* Prior part of Bayesian inference */                         
## [26]     /* Flat prior for mu (no need to specify if non-informative) */
## [27]     mu ~ normal(mu_mean, mu_sd);                                   
## [28]     sigma ~ exponential(sigma_exp);                                
## [29]                                                                    
## [30]                                                                    
## [31]     /* Likelihood part of Bayesian inference */                    
## [32]     /* Outcome model N(mu, sigma) */                               
## [33]     y ~ normal(mu, sigma);                                         
## [34] }

Fitting can be conducted using the stan function in the rstan package.

fit <- rstan::stan(file = "./height.stan",
                   data = list(n = nrow(Howell1),
                               y = Howell1$height,
                               ## Hyperparameter values
                               mu_mean = 178,
                               mu_sd = 20,
                               sigma_exp = 25),
            iter = 1000, chains = 4)

## 
## SAMPLING FOR MODEL 'height' NOW (CHAIN 1).
## 
## Gradient evaluation took 4.7e-05 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.47 seconds.
## Adjust your expectations accordingly!
## 
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## 
##  Elapsed Time: 0.07397 seconds (Warm-up)
##                0.056313 seconds (Sampling)
##                0.130283 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'height' NOW (CHAIN 2).
## 
## Gradient evaluation took 2.9e-05 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.29 seconds.
## Adjust your expectations accordingly!
## 
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## 
##  Elapsed Time: 0.109342 seconds (Warm-up)
##                0.107164 seconds (Sampling)
##                0.216506 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'height' NOW (CHAIN 3).
## 
## Gradient evaluation took 2.4e-05 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.24 seconds.
## Adjust your expectations accordingly!
## 
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## 
##  Elapsed Time: 0.067465 seconds (Warm-up)
##                0.060244 seconds (Sampling)
##                0.127709 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'height' NOW (CHAIN 4).
## 
## Gradient evaluation took 2.4e-05 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.24 seconds.
## Adjust your expectations accordingly!
## 
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## 
##  Elapsed Time: 0.090759 seconds (Warm-up)
##                0.082279 seconds (Sampling)
##                0.173038 seconds (Total)

The shinystan package provides interactive diagnostics (not run here).

shinystan::launch_shinystan(fit)

Show results. The effective sample size increased a lot after changing the prior from Uniform(0,50) to Exponential(25).

fit

## Inference for Stan model: height.
## 4 chains, each with iter=1000; warmup=500; thin=1; 
## post-warmup draws per chain=500, total post-warmup draws=2000.
## 
##           mean se_mean   sd     2.5%      25%      50%      75%    97.5% n_eff Rhat
## mu      154.62    0.01 0.35   153.93   154.37   154.62   154.84   155.32  1312 1.00
## sigma     6.43    0.00 0.18     6.08     6.30     6.42     6.55     6.80  1432 1.00
## lp__  -1070.10    0.03 1.04 -1072.87 -1070.49 -1069.78 -1069.35 -1069.10   884 1.01
## 
## Samples were drawn using NUTS(diag_e) at Sat Oct  6 05:23:32 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Show the summary results.

summary(fit)

## $summary
##               mean     se_mean        sd         2.5%          25%          50%          75%        97.5%     n_eff
## mu      154.616041 0.009766458 0.3536945   153.933597   154.368782   154.615028   154.838072   155.315303 1311.5428
## sigma     6.428279 0.004842861 0.1832864     6.083154     6.304286     6.422239     6.548656     6.797379 1432.3744
## lp__  -1070.095105 0.034902857 1.0375120 -1072.867126 -1070.490223 -1069.779194 -1069.350191 -1069.101708  883.6174
##            Rhat
## mu    1.0022927
## sigma 0.9998109
## lp__  1.0059778
## 
## $c_summary
## , , chains = chain:1
## 
##          stats
## parameter         mean        sd         2.5%         25%          50%          75%        97.5%
##     mu      154.617513 0.3631546   153.919473   154.36201   154.609607   154.852119   155.319546
##     sigma     6.436915 0.1870575     6.091419     6.30579     6.430188     6.555873     6.799664
##     lp__  -1070.140110 1.0256265 -1073.018097 -1070.52773 -1069.831417 -1069.407504 -1069.100218
## 
## , , chains = chain:2
## 
##          stats
## parameter         mean        sd         2.5%          25%          50%          75%        97.5%
##     mu      154.614191 0.3463174   153.978934   154.357600   154.635913   154.840282   155.305731
##     sigma     6.428466 0.1822258     6.112142     6.293464     6.420475     6.550305     6.815281
##     lp__  -1070.065843 0.9694640 -1072.662628 -1070.434190 -1069.813109 -1069.353337 -1069.115944
## 
## , , chains = chain:3
## 
##          stats
## parameter         mean        sd         2.5%          25%         50%          75%        97.5%
##     mu      154.612200 0.3330706   153.948240   154.388763   154.60740   154.817022   155.282215
##     sigma     6.422648 0.1795321     6.072934     6.311886     6.40535     6.544497     6.795871
##     lp__  -1070.015628 0.9989886 -1072.613160 -1070.399933 -1069.66202 -1069.304923 -1069.100123
## 
## , , chains = chain:4
## 
##          stats
## parameter         mean        sd         2.5%          25%          50%          75%        97.5%
##     mu      154.620258 0.3719678   153.896870   154.372740   154.618883   154.848457   155.361143
##     sigma     6.425089 0.1844787     6.082604     6.299737     6.424582     6.543414     6.777316
##     lp__  -1070.158840 1.1441854 -1073.229305 -1070.606984 -1069.826401 -1069.364397 -1069.101971

Show traceplots.

rstan::traceplot(fit)

Get posterior values.

df_fit <- tidybayes::tidy_draws(fit)
df_fit

## # A tibble: 2,000 x 6
##    .chain .iteration .draw    mu sigma   lp__
##     <int>      <int> <int> <dbl> <dbl>  <dbl>
##  1      1          1     1  155.  6.67 -1070.
##  2      1          2     2  155.  6.07 -1071.
##  3      1          3     3  155.  6.77 -1071.
##  4      1          4     4  155.  6.70 -1070.
##  5      1          5     5  155.  6.64 -1070.
##  6      1          6     6  154.  6.61 -1070.
##  7      1          7     7  154.  6.23 -1070.
##  8      1          8     8  154.  6.24 -1070.
##  9      1          9     9  155.  6.43 -1069.
## 10      1         10    10  154.  6.43 -1069.
## # ... with 1,990 more rows

Show a 2D traceplot.

ggplot(data = df_fit, mapping = aes(x = mu, y = sigma, group = .chain, color = factor(.chain))) +
    geom_path(size = 0.1) +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          legend.key = element_blank(),
          plot.title = element_text(hjust = 0.5),
          strip.background = element_blank())

Posterior distribution of heights. Our unimodal model seems somewhat different from the observed data.

df_fit %>%
    mutate(height = rnorm(length(mu), mu, sigma)) %>%
    ggplot(mapping = aes(x = height)) +
    geom_density() +
    geom_density(data = Howell1, color = "red") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          legend.key = element_blank(),
          plot.title = element_text(hjust = 0.5),
          strip.background = element_blank())

By the design of the model (overall mean only), the posterior predictive distribution is unimodal and the fit appears poor.

Posterior probability of \(\mu > 155\)

df_fit %>%
    summarize(prob_gr_155 = mean(mu > 155))

## # A tibble: 1 x 1
##   prob_gr_155
##         <dbl>
## 1       0.138

Various plots can be used for diagnostics.
https://www.rdocumentation.org/packages/rstanarm/versions/2.10.1/topics/rstanarm-plots

## Posterior intervals and point estimates
stan_plot(fit, pars = c("mu","sigma"))

## Traceplots
stan_trace(fit, pars = c("mu","sigma"))

## Histograms
stan_hist(fit, pars = c("mu","sigma"))

## Kernel density estimates
stan_dens(fit, pars = c("mu","sigma"))

## Scatterplots
stan_scat(fit, pars = c("mu","sigma"))

## Diagnostics for Hamiltonian Monte Carlo and the No-U-Turn Sampler
stan_diag(fit)

## Rhat
stan_rhat(fit)

## Ratio of effective sample size to total posterior sample size
stan_ess(fit)

## Ratio of Monte Carlo standard error to posterior standard deviation
stan_mcse(fit)

## Autocorrelation
stan_ac(fit)

rstanarm

A wrapper package rstanarm can simplify coding.

library(rstanarm)
fit_rstanarm <- rstanarm::stan_glm(formula = height ~ 1,
                                   family = gaussian(),
                                   data = Howell1,
                                   prior_intercept = normal(178, 20, autoscale = FALSE),
                                   ## Uniform is not supported
                                   prior_aux = exponential(25))

The underlying stan code can be extracted, but it is very complicated.

cat(rstan::get_stancode(fit_rstanarm$stanfit))

## #include /pre/Columbia_copyright.stan
## #include /pre/license.stan
## 
## // GLM for a Gaussian, Gamma, inverse Gaussian, or Beta outcome
## functions {
## #include /functions/common_functions.stan
## #include /functions/continuous_likelihoods.stan
## #include /functions/SSfunctions.stan
## 
##   /** 
##   * test function for csr_matrix_times_vector
##   *
##   * @param m Integer number of rows
##   * @param n Integer number of columns
##   * @param w Vector (see reference manual)
##   * @param v Integer array (see reference manual)
##   * @param u Integer array (see reference manual)
##   * @param b Vector that is multiplied from the left by the CSR matrix
##   * @return A vector that is the product of the CSR matrix and b
##   */
##   vector test_csr_matrix_times_vector(int m, int n, vector w, 
##                                       int[] v, int[] u, vector b) {
##     return csr_matrix_times_vector(m, n, w, v, u, b); 
##   }
## }
## data {
##   // declares N, K, X, xbar, dense_X, nnz_x, w_x, v_x, u_x
## #include /data/NKX.stan
##   int<lower=0> len_y;      // length of y
##   real lb_y; // lower bound on y
##   real<lower=lb_y> ub_y; // upper bound on y
##   vector<lower=lb_y, upper=ub_y>[len_y] y; // continuous outcome
##   int<lower=1,upper=4> family; // 1 gaussian, 2 gamma, 3 inv-gaussian, 4 beta
##   // declares prior_PD, has_intercept, link, prior_dist, prior_dist_for_intercept
## #include /data/data_glm.stan
##   // declares has_weights, weights, has_offset, offset
## #include /data/weights_offset.stan
##   // declares prior_{mean, scale, df}, prior_{mean, scale, df}_for_intercept, prior_{mean, scale, df}_for_aux
## #include /data/hyperparameters.stan
##   // declares t, p[t], l[t], q, len_theta_L, shape, scale, {len_}concentration, {len_}regularization
## #include /data/glmer_stuff.stan
##   // declares num_not_zero, w, v, u
## #include /data/glmer_stuff2.stan
## #include /data/data_betareg.stan
## 
##   int<lower=0,upper=10> SSfun; // nonlinear function indicator, 0 for identity
##   vector[SSfun > 0  ? len_y : 0] input;
##   vector[SSfun == 5 ? len_y : 0] Dose;
## }
## transformed data {
##   vector[family == 3 ? len_y : 0] sqrt_y;
##   vector[family == 3 ? len_y : 0] log_y;
##   real sum_log_y = family == 1 ? not_a_number() : sum(log(y));
##   int<lower=1> V[special_case ? t : 0, len_y] = make_V(len_y, special_case ? t : 0, v);
##   int<lower=0> hs_z;                  // for tdata_betareg.stan
##   // defines hs, len_z_T, len_var_group, delta, is_continuous, pos
## #include /tdata/tdata_glm.stan
##   // defines hs_z
## #include /tdata/tdata_betareg.stan
##   is_continuous = 1;
## 
##   if (family == 3) {
##     sqrt_y = sqrt(y);
##     log_y = log(y);
##   }
## }
## parameters {
##   real<lower=make_lower(family, link),upper=make_upper(family,link)> gamma[has_intercept];
##   // declares z_beta, global, local, z_b, z_T, rho, zeta, tau
## #include /parameters/parameters_glm.stan
##   real<lower=0> aux_unscaled; // interpretation depends on family!
## #include /parameters/parameters_betareg.stan
## }
## transformed parameters {
##   // aux has to be defined first in the hs case
##   real aux = prior_dist_for_aux == 0 ? aux_unscaled : (prior_dist_for_aux <= 2 ? 
##              prior_scale_for_aux * aux_unscaled + prior_mean_for_aux :
##              prior_scale_for_aux * aux_unscaled);
## 
##   vector[z_dim] omega; // used in tparameters_betareg.stan
##   // defines beta, b, theta_L
## #include /tparameters/tparameters_glm.stan
## #include /tparameters/tparameters_betareg.stan
##   
##   if (prior_dist_for_aux == 0) // none
##     aux = aux_unscaled;
##   else {
##     aux = prior_scale_for_aux * aux_unscaled;
##     if (prior_dist_for_aux <= 2) // normal or student_t
##       aux = aux + prior_mean_for_aux;
##   }
## 
##   if (t > 0) {
##     if (special_case == 1) {
##       int start = 1;
##       theta_L = scale .* tau * aux;
##       if (t == 1) b = theta_L[1] * z_b;
##       else for (i in 1:t) {
##         int end = start + l[i] - 1;
##         b[start:end] = theta_L[i] * z_b[start:end];
##         start = end + 1;
##       }
##     }
##     else {
##       theta_L = make_theta_L(len_theta_L, p, 
##                              aux, tau, scale, zeta, rho, z_T);
##       b = make_b(z_b, theta_L, p, l);
##     }
##   }
## }
## model {
##   vector[N] eta_z; // beta regression linear predictor for phi
## #include /model/make_eta.stan
##   if (t > 0) {
## #include /model/eta_add_Zb.stan
##   }
##   if (has_intercept == 1) {
##     if ((family == 1 || link == 2) || (family == 4 && link != 5)) eta = eta + gamma[1];
##     else if (family == 4 && link == 5) eta = eta - max(eta) + gamma[1];
##     else eta = eta - min(eta) + gamma[1];
##   }
##   else {
## #include /model/eta_no_intercept.stan
##   }
## 
##   if (SSfun > 0) { // nlmer
##     matrix[len_y, K] P;
##     P = reshape_vec(eta, len_y, K);
##     if (SSfun < 5) {
##       if (SSfun <= 2) {
##         if (SSfun == 1) target += normal_lpdf(y | SS_asymp(input, P), aux);
##         else target += normal_lpdf(y | SS_asympOff(input, P), aux);
##       }
##       else if (SSfun == 3) target += normal_lpdf(y | SS_asympOrig(input, P), aux);
##       else {
##         for (i in 1:len_y) P[i,1] = P[i,1] + exp(P[i,3]); // ordering constraint
##         target += normal_lpdf(y | SS_biexp(input, P), aux);
##       }
##     }
##     else {
##       if (SSfun <= 7) {
##         if (SSfun == 5) target += normal_lpdf(y | SS_fol(Dose, input, P), aux);
##         else if (SSfun == 6) target += normal_lpdf(y | SS_fpl(input, P), aux);
##         else target += normal_lpdf(y | SS_gompertz(input, P), aux);
##       }
##       else {
##         if (SSfun == 8) target += normal_lpdf(y | SS_logis(input, P), aux);
##         else if (SSfun == 9) target += normal_lpdf(y | SS_micmen(input, P), aux);
##         else target += normal_lpdf(y | SS_weibull(input, P), aux);
##       }
##     }
##   }
##   else if (has_weights == 0 && prior_PD == 0) { // unweighted log-likelihoods
## #include /model/make_eta_z.stan
##     // adjust eta_z according to links
##     if (has_intercept_z == 1) {
##       if (link_phi > 1) {
##         eta_z = eta_z - min(eta_z) + gamma_z[1];
##       }
##       else {
##         eta_z = eta_z + gamma_z[1];
##       }
##     }
##     else { // has_intercept_z == 0
## #include /model/eta_z_no_intercept.stan
##     }
##     if (family == 1) {
##       if (link == 1) 
##         target += normal_lpdf(y | eta, aux);
##       else if (link == 2) 
##         target += normal_lpdf(y | exp(eta), aux);
##       else 
##         target += normal_lpdf(y | inv(eta), aux);
##     }
##     else if (family == 2) {
##       target += GammaReg(y, eta, aux, link, sum_log_y);
##     }
##     else if (family == 3) {
##       target += inv_gaussian(y, linkinv_inv_gaussian(eta, link), 
##                              aux, sum_log_y, sqrt_y);
##     }
##     else if (family == 4 && link_phi == 0) {
##       vector[N] mu;
##       mu = linkinv_beta(eta, link);
##       target += beta_lpdf(y | mu * aux, (1 - mu) * aux);
##     }
##     else if (family == 4 && link_phi > 0) {
##       vector[N] mu;
##       vector[N] mu_z;
##       mu = linkinv_beta(eta, link);
##       mu_z = linkinv_beta_z(eta_z, link_phi);
##       target += beta_lpdf(y | rows_dot_product(mu, mu_z), 
##                           rows_dot_product((1 - mu) , mu_z));
##     }
##   }
##   else if (prior_PD == 0) { // weighted log-likelihoods
##     vector[N] summands;
##     if (family == 1) summands = pw_gauss(y, eta, aux, link);
##     else if (family == 2) summands = pw_gamma(y, eta, aux, link);
##     else if (family == 3) summands = pw_inv_gaussian(y, eta, aux, link, log_y, sqrt_y);
##     else if (family == 4 && link_phi == 0) summands = pw_beta(y, eta, aux, link);
##     else if (family == 4 && link_phi > 0) summands = pw_beta_z(y, eta, eta_z, link, link_phi);
##     target += dot_product(weights, summands);
##   }
## 
##   // Log-priors
##   if (prior_dist_for_aux > 0 && prior_scale_for_aux > 0) {
##     real log_half = -0.693147180559945286;
##     if (prior_dist_for_aux == 1)
##       target += normal_lpdf(aux_unscaled | 0, 1) - log_half;
##     else if (prior_dist_for_aux == 2)
##       target += student_t_lpdf(aux_unscaled | prior_df_for_aux, 0, 1) - log_half;
##     else 
##      target += exponential_lpdf(aux_unscaled | 1);
##   }
##     
## #include /model/priors_glm.stan
## #include /model/priors_betareg.stan
##   if (t > 0) decov_lp(z_b, z_T, rho, zeta, tau, 
##                       regularization, delta, shape, t, p);
## }
## generated quantities {
##   real alpha[has_intercept];
##   real omega_int[has_intercept_z];
##   real mean_PPD = 0;
##   if (has_intercept == 1) {
##     if (dense_X) alpha[1] = gamma[1] - dot_product(xbar, beta);
##     else alpha[1] = gamma[1];
##   }
##   if (has_intercept_z == 1) { 
##     omega_int[1] = gamma_z[1] - dot_product(zbar, omega);  // adjust betareg intercept 
##   }
##   {
##     vector[N] eta_z;
## #include /model/make_eta.stan
##     if (t > 0) {
## #include /model/eta_add_Zb.stan
##     }
##     if (has_intercept == 1) {
##       if (make_lower(family,link) == negative_infinity() &&
##           make_upper(family,link) == positive_infinity()) eta = eta + gamma[1];
##       else if (family == 4 && link == 5) {
##         real max_eta = max(eta);
##         alpha[1] = alpha[1] - max_eta;
##         eta = eta - max_eta + gamma[1];
##       }
##       else {
##         real min_eta = min(eta);
##         alpha[1] = alpha[1] - min_eta;
##         eta = eta - min_eta + gamma[1];
##       }
##     }
##     else {
## #include /model/eta_no_intercept.stan
##     }
## #include /model/make_eta_z.stan
##     // adjust eta_z according to links
##     if (has_intercept_z == 1) {
##       if (link_phi > 1) {
##         omega_int[1] = omega_int[1] - min(eta_z);
##         eta_z = eta_z - min(eta_z) + gamma_z[1];
##       }
##       else {
##         eta_z = eta_z + gamma_z[1];
##       }
##     }
##     else { // has_intercept_z == 0
## #include /model/eta_z_no_intercept.stan
##     }
##     
##     if (SSfun > 0) { // nlmer
##       vector[len_y] eta_nlmer;
##       matrix[len_y, K] P;      
##       P = reshape_vec(eta, len_y, K);
##       if (SSfun < 5) {
##         if (SSfun <= 2) {
##           if (SSfun == 1) eta_nlmer = SS_asymp(input, P);
##           else eta_nlmer = SS_asympOff(input, P);
##         }
##         else if (SSfun == 3) eta_nlmer = SS_asympOrig(input, P);
##         else eta_nlmer = SS_biexp(input, P);
##       }
##       else {
##         if (SSfun <= 7) {
##           if (SSfun == 5) eta_nlmer = SS_fol(Dose, input, P);
##           else if (SSfun == 6) eta_nlmer = SS_fpl(input, P);
##           else eta_nlmer = SS_gompertz(input, P);
##         }
##         else {
##           if (SSfun == 8) eta_nlmer = SS_logis(input, P);
##           else if (SSfun == 9) eta_nlmer = SS_micmen(input, P);
##           else eta_nlmer = SS_weibull(input, P);
##         }
##       }
##       for (n in 1:len_y) mean_PPD = mean_PPD + normal_rng(eta_nlmer[n], aux);
##     }
##     else if (family == 1) {
##       if (link > 1) eta = linkinv_gauss(eta, link);
##       for (n in 1:len_y) mean_PPD = mean_PPD + normal_rng(eta[n], aux);
##     }
##     else if (family == 2) {
##       if (link > 1) eta = linkinv_gamma(eta, link);
##       for (n in 1:len_y) mean_PPD = mean_PPD + gamma_rng(aux, aux / eta[n]);
##     }
##     else if (family == 3) {
##       if (link > 1) eta = linkinv_inv_gaussian(eta, link);
##       for (n in 1:len_y) mean_PPD = mean_PPD + inv_gaussian_rng(eta[n], aux);
##     }
##     else if (family == 4 && link_phi == 0) { 
##       eta = linkinv_beta(eta, link);
##       for (n in 1:N) {
##         real eta_n = eta[n];
##         if (aux <= 0) mean_PPD = mean_PPD + bernoulli_rng(0.5);
##         else if (eta_n >= 1) mean_PPD = mean_PPD + 1;
##         else if (eta_n > 0)
##           mean_PPD = mean_PPD + beta_rng(eta[n] * aux, (1 - eta[n]) * aux);
##       }
##     }
##     else if (family == 4 && link_phi > 0) {
##       eta = linkinv_beta(eta, link);
##       eta_z = linkinv_beta_z(eta_z, link_phi);
##       for (n in 1:N) {
##         real eta_n = eta[n];
##         real aux_n = eta_z[n];
##         if (aux_n <= 0) mean_PPD = mean_PPD + bernoulli_rng(0.5);
##         else if (eta_n >= 1) mean_PPD = mean_PPD + 1;
##         else if (eta_n > 0)
##           mean_PPD = mean_PPD + beta_rng(eta_n * aux_n, (1 - eta_n) * aux_n);
##       }
##     }
##     mean_PPD = mean_PPD / len_y;
##   }
## }

The prior specification can be examined as follows.

rstanarm::prior_summary(fit_rstanarm)

## Priors for model 'fit_rstanarm' 
## ------
## Intercept (after predictors centered)
##  ~ normal(location = 178, scale = 20)
## 
## Auxiliary (sigma)
##  ~ exponential(rate = 25)
##      **adjusted scale = 0.31 (adjusted rate = 1/adjusted scale)
## ------
## See help('prior_summary.stanreg') for more details

Plot to obtain a posterior figure.

plot(fit_rstanarm)

Various plots can be used for diagnostics.
https://www.rdocumentation.org/packages/rstanarm/versions/2.10.1/topics/rstanarm-plots

## Posterior intervals and point estimates
stan_plot(fit_rstanarm, pars = c("(Intercept)","sigma"))

## Traceplots
stan_trace(fit_rstanarm, pars = c("(Intercept)","sigma"))

## Histograms
stan_hist(fit_rstanarm, pars = c("(Intercept)","sigma"))

## Kernel density estimates
stan_dens(fit_rstanarm, pars = c("(Intercept)","sigma"))

## Scatterplots
stan_scat(fit_rstanarm, pars = c("(Intercept)","sigma"))

## Diagnostics for Hamiltonian Monte Carlo and the No-U-Turn Sampler
stan_diag(fit_rstanarm)

## Rhat
stan_rhat(fit_rstanarm, pars = c("(Intercept)","sigma"))

## Ratio of effective sample size to total posterior sample size
stan_ess(fit_rstanarm, pars = c("(Intercept)","sigma"))

## Ratio of Monte Carlo standard error to posterior standard deviation
stan_mcse(fit_rstanarm, pars = c("(Intercept)","sigma"))

## Autocorrelation
stan_ac(fit_rstanarm, pars = c("(Intercept)","sigma"))

The results are summarized as follows.

summary(fit_rstanarm)

## 
## Model Info:
## 
##  function:     stan_glm
##  family:       gaussian [identity]
##  formula:      height ~ 1
##  algorithm:    sampling
##  priors:       see help('prior_summary')
##  sample:       4000 (posterior sample size)
##  observations: 352
##  predictors:   1
## 
## Estimates:
##                 mean    sd      2.5%    25%     50%     75%     97.5%
## (Intercept)     154.6     0.4   153.8   154.3   154.6   154.9   155.4
## sigma             7.5     0.3     7.0     7.3     7.5     7.7     8.1
## mean_PPD        154.6     0.6   153.5   154.2   154.6   155.0   155.7
## log-posterior -1246.4     1.0 -1248.8 -1246.8 -1246.1 -1245.7 -1245.4
## 
## Diagnostics:
##               mcse Rhat n_eff
## (Intercept)   0.0  1.0  2547 
## sigma         0.0  1.0  2580 
## mean_PPD      0.0  1.0  3093 
## log-posterior 0.0  1.0  1746 
## 
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).

stan gender model

Gaussian model for height: \(y_{i} \sim N(\mu_{i}, \sigma^{2})\)
Mean model: \(\mu_{i} = \mu_{0} + \delta MALE_{i}\)
Prior for female mean parameter: \(\mu_{0} \sim N(170, 20)\)
Prior for sex difference parameter. \(\delta \sim N(10, 10)\)
Prior for SD parameter: \(\sigma \sim Exponential(25)\)
Prior predictive density of the outcome is the prior belief expressed in terms of the outcome variable distribution.

\(f(y) = \int f(y|\mu_{0},\delta,\sigma) g_{1}(\mu_{{0}}) g_{2}(\delta) g_{3}(\sigma) \text{d}\mu_{0} \text{d}\delta \text{d}\sigma\)

n_prior_sample <- 10^4
mu0_mean  <- 170
mu0_sd <- 20
delta_mean  <- 10
delta_sd <- 10
sigma_exp <- 25
p_male <- 0.5
## Independence assumed. OK to sample separately.
prior_sample <- data_frame(male = rbinom(n_prior_sample, size = 1, prob = p_male),
                           mu0 = rnorm(n_prior_sample, mu0_mean, mu0_sd),
                           delta = rnorm(n_prior_sample, delta_mean, delta_sd),
                           mu_i = mu0 + delta * male,
                           sigma = rexp(n_prior_sample, sigma_exp)) %>%
    ## Sample from the prior distribution of heights.
    mutate(prior_y = rnorm(n_prior_sample, mu_i, sigma))
prior_sample

## # A tibble: 10,000 x 6
##     male   mu0   delta  mu_i    sigma prior_y
##    <int> <dbl>   <dbl> <dbl>    <dbl>   <dbl>
##  1     0  173.  -2.82   173. 0.0267      173.
##  2     0  144.  26.4    144. 0.000673    144.
##  3     1  134.   0.895  135. 0.0759      135.
##  4     1  157.   4.44   162. 0.00199     162.
##  5     1  182.   8.24   190. 0.0454      190.
##  6     0  186. -19.8    186. 0.0335      186.
##  7     0  172.   8.34   172. 0.00865     172.
##  8     0  135.  -1.89   135. 0.0501      135.
##  9     1  174.  12.6    186. 0.0206      186.
## 10     1  183.   6.78   190. 0.00459     190.
## # ... with 9,990 more rows

## Plot
ggplot(data = prior_sample, mapping = aes(x = prior_y)) +
    geom_density() +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          legend.key = element_blank(),
          plot.title = element_text(hjust = 0.5),
          strip.background = element_blank())

The prior for the sex difference is likely too vague to create a bimodal prior predictive distribution of heights..

The model stated above can be coded as follows in stan.

print(system("cat ./height_sex.stan", intern = TRUE), quote = FALSE)

##  [1] data {                                                             
##  [2]     /* Hyperparameters are given as data */                        
##  [3]     real mu0_mean;                                                 
##  [4]     real mu0_sd;                                                   
##  [5]     real delta_mean;                                               
##  [6]     real delta_sd;                                                 
##  [7]     real<lower=0> sigma_exp;                                       
##  [8]                                                                    
##  [9]     /* Define variables in data */                                 
## [10]     /* Number of observations (an integer) */                      
## [11]     int<lower=1> n;                                                
## [12]     /* Male sex indicator */                                       
## [13]     int<lower=0, upper=1> male[n];                                 
## [14]     /* Outcome (a real vector of length n) */                      
## [15]     real y[n];                                                     
## [16] }                                                                  
## [17]                                                                    
## [18]                                                                    
## [19] parameters {                                                       
## [20]     /* Define parameters to estimate */                            
## [21]     /* Female population mean (a real number) */                   
## [22]     real mu0;                                                      
## [23]     /* Sex difference */                                           
## [24]     real delta;                                                    
## [25]     /* Population SD (a positive real number) */                   
## [26]     real<lower=0> sigma;                                           
## [27] }                                                                  
## [28]                                                                    
## [29]                                                                    
## [30] model {                                                            
## [31]     /* Prior part of Bayesian inference */                         
## [32]     /* Flat prior for mu (no need to specify if non-informative) */
## [33]     mu0 ~ normal(mu0_mean, mu0_sd);                                
## [34]     delta ~ normal(delta_mean, delta_sd);                          
## [35]     sigma ~ exponential(sigma_exp);                                
## [36]                                                                    
## [37]                                                                    
## [38]     /* Likelihood part of Bayesian inference */                    
## [39]     for (i in 1:n) {                                               
## [40]         y[i] ~ normal(mu0 + delta*male[i], sigma);                 
## [41]     }                                                              
## [42] }

Fitting can be conducted using the stan function in the rstan package.

fit_sex <- rstan::stan(file = "./height_sex.stan",
                       data = list(n = nrow(Howell1),
                                   male = Howell1$male,
                                   y = Howell1$height,
                                   ## Hyperparameter values
                                   mu0_mean = 170,
                                   mu0_sd = 20,
                                   delta_mean = 10,
                                   delta_sd = 10,
                                   sigma_exp = 25),
                       iter = 1000, chains = 4)

## 
## SAMPLING FOR MODEL 'height_sex' NOW (CHAIN 1).
## 
## Gradient evaluation took 0.00014 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 1.4 seconds.
## Adjust your expectations accordingly!
## 
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## 
##  Elapsed Time: 0.961099 seconds (Warm-up)
##                0.484578 seconds (Sampling)
##                1.44568 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'height_sex' NOW (CHAIN 2).
## 
## Gradient evaluation took 0.000207 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 2.07 seconds.
## Adjust your expectations accordingly!
## 
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## 
##  Elapsed Time: 0.994068 seconds (Warm-up)
##                0.599564 seconds (Sampling)
##                1.59363 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'height_sex' NOW (CHAIN 3).
## 
## Gradient evaluation took 0.000178 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 1.78 seconds.
## Adjust your expectations accordingly!
## 
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## 
##  Elapsed Time: 0.978648 seconds (Warm-up)
##                0.537336 seconds (Sampling)
##                1.51598 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'height_sex' NOW (CHAIN 4).
## 
## Gradient evaluation took 0.000175 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 1.75 seconds.
## Adjust your expectations accordingly!
## 
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## 
##  Elapsed Time: 0.833791 seconds (Warm-up)
##                0.43968 seconds (Sampling)
##                1.27347 seconds (Total)

The shinystan package provides interactive diagnostics (not run here).

shinystan::launch_shinystan(fit_sex)

Show results. The effective sample size increased a lot after changing the prior from Uniform(0,50) to Exponential(25).

fit_sex

## Inference for Stan model: height_sex.
## 4 chains, each with iter=1000; warmup=500; thin=1; 
## post-warmup draws per chain=500, total post-warmup draws=2000.
## 
##          mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
## mu0    149.52    0.01 0.35  148.83  149.29  149.52  149.74  150.21  1182    1
## delta   10.85    0.02 0.51    9.82   10.52   10.85   11.20   11.86  1078    1
## sigma    4.79    0.00 0.15    4.51    4.68    4.79    4.89    5.10  1165    1
## lp__  -905.51    0.05 1.27 -908.77 -906.09 -905.17 -904.56 -904.08   764    1
## 
## Samples were drawn using NUTS(diag_e) at Sat Oct  6 05:24:52 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Show the summary results.

summary(fit_sex)

## $summary
##              mean     se_mean        sd        2.5%         25%         50%        75%       97.5%     n_eff
## mu0    149.519540 0.010322366 0.3549153  148.826641  149.290095  149.521218  149.74463  150.214240 1182.1997
## delta   10.854938 0.015643569 0.5135080    9.818063   10.522748   10.846471   11.19851   11.863126 1077.5135
## sigma    4.789515 0.004439313 0.1515337    4.507359    4.679808    4.788478    4.88773    5.100379 1165.1636
## lp__  -905.505759 0.045886572 1.2680662 -908.772781 -906.090092 -905.169241 -904.56057 -904.081981  763.6822
##           Rhat
## mu0   1.003617
## delta 1.002176
## sigma 1.001174
## lp__  1.000692
## 
## $c_summary
## , , chains = chain:1
## 
##          stats
## parameter        mean        sd        2.5%         25%         50%         75%      97.5%
##     mu0    149.493763 0.3629291  148.798109  149.262765  149.514870  149.730336  150.18389
##     delta   10.875604 0.5099152    9.946888   10.520426   10.870530   11.209234   11.90917
##     sigma    4.779234 0.1401082    4.499962    4.678117    4.792342    4.866212    5.08628
##     lp__  -905.455840 1.2637180 -908.597826 -906.030081 -905.152537 -904.512547 -904.08615
## 
## , , chains = chain:2
## 
##          stats
## parameter        mean        sd        2.5%        25%         50%         75%       97.5%
##     mu0    149.512593 0.3490029  148.810404  149.28173  149.502894  149.751901  150.205716
##     delta   10.876287 0.5053849    9.884155   10.58052   10.854030   11.202886   11.923446
##     sigma    4.788163 0.1493261    4.511496    4.68579    4.787635    4.882518    5.089354
##     lp__  -905.490276 1.3280657 -908.909259 -905.99451 -905.119389 -904.519608 -904.062561
## 
## , , chains = chain:3
## 
##          stats
## parameter        mean        sd        2.5%         25%         50%         75%       97.5%
##     mu0    149.526310 0.3549025  148.842844  149.287584  149.527200  149.729970  150.306370
##     delta   10.836653 0.5029623    9.704061   10.520100   10.834727   11.165013   11.745060
##     sigma    4.783739 0.1616324    4.495947    4.667861    4.776697    4.899764    5.097192
##     lp__  -905.535855 1.2235567 -908.474618 -906.072624 -905.223587 -904.631003 -904.097277
## 
## , , chains = chain:4
## 
##          stats
## parameter        mean        sd        2.5%         25%         50%        75%       97.5%
##     mu0    149.545494 0.3517175  148.824104  149.334051  149.543955  149.76536  150.218700
##     delta   10.831207 0.5349435    9.798185   10.501684   10.824102   11.20996   11.852644
##     sigma    4.806924 0.1532626    4.522657    4.695614    4.793949    4.91300    5.118884
##     lp__  -905.541064 1.2565465 -908.744018 -906.187152 -905.223437 -904.56647 -904.084770

Show traceplots.

rstan::traceplot(fit_sex)

Get posterior values.

df_fit_sex <- tidybayes::tidy_draws(fit_sex)
df_fit_sex

## # A tibble: 2,000 x 7
##    .chain .iteration .draw   mu0 delta sigma  lp__
##     <int>      <int> <int> <dbl> <dbl> <dbl> <dbl>
##  1      1          1     1  150. 10.6   4.87 -904.
##  2      1          2     2  150. 10.8   4.93 -906.
##  3      1          3     3  150.  9.92  4.75 -906.
##  4      1          4     4  149. 11.2   4.79 -904.
##  5      1          5     5  149. 10.8   4.92 -905.
##  6      1          6     6  150. 10.7   4.94 -905.
##  7      1          7     7  149. 11.6   4.77 -907.
##  8      1          8     8  149. 11.5   4.75 -907.
##  9      1          9     9  150. 10.4   4.65 -906.
## 10      1         10    10  150. 10.3   4.69 -905.
## # ... with 1,990 more rows

Show a 2D traceplot.

ggplot(data = df_fit_sex, mapping = aes(x = mu0, y = sigma, group = .chain, color = factor(.chain))) +
    geom_path(size = 0.1) +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          legend.key = element_blank(),
          plot.title = element_text(hjust = 0.5),
          strip.background = element_blank())

Posterior distribution of heights. Our unimodal model seems somewhat different from the observed data.

df_fit_sex %>%
    mutate(male = rbinom(length(mu0), 1, mean(Howell1$male)),
           height = rnorm(length(mu0), mu0 + delta*male, sigma)) %>%
    ggplot(mapping = aes(x = height)) +
    geom_density() +
    geom_density(data = Howell1, color = "red") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          legend.key = element_blank(),
          plot.title = element_text(hjust = 0.5),
          strip.background = element_blank())

This captures the bimodal nature of the observed data much better than the previous overall mean model.

Posterior probability of \(\mu0 > 155\)

df_fit_sex %>%
    summarize(`P[mu0 > 155]` = mean(mu0 > 155),
              `P[mu0+delta > 155]` = mean(mu0+delta > 155))

## # A tibble: 1 x 2
##   `P[mu0 > 155]` `P[mu0+delta > 155]`
##            <dbl>                <dbl>
## 1              0                    1

Various plots can be used for diagnostics.
https://www.rdocumentation.org/packages/rstanarm/versions/2.10.1/topics/rstanarm-plots

## Posterior intervals and point estimates
stan_plot(fit_sex, pars = c("mu0","delta","sigma"))

## Traceplots
stan_trace(fit_sex, pars = c("mu0","delta","sigma"))

## Histograms
stan_hist(fit_sex, pars = c("mu0","delta","sigma"))

## Kernel density estimates
stan_dens(fit_sex, pars = c("mu0","delta","sigma"))

## Scatterplots
stan_scat(fit_sex, pars = c("mu0","delta","sigma"))

## Error: 'pars' must contain exactly two parameter names

## Diagnostics for Hamiltonian Monte Carlo and the No-U-Turn Sampler
stan_diag(fit_sex)

## Rhat
stan_rhat(fit_sex)

## Ratio of effective sample size to total posterior sample size
stan_ess(fit_sex)

## Ratio of Monte Carlo standard error to posterior standard deviation
stan_mcse(fit_sex)

## Autocorrelation
stan_ac(fit_sex)

rstanarm sex model

fit_rstanarm_sex <- rstanarm::stan_glm(formula = height ~ male,
                                       family = gaussian(),
                                       data = Howell1,
                                       prior_intercept = normal(170, 20, autoscale = FALSE),
                                       prior = normal(10, 10, autoscale = FALSE),
                                       prior_aux = exponential(25))

Show prior specifications.

rstanarm::prior_summary(fit_rstanarm_sex)

## Priors for model 'fit_rstanarm_sex' 
## ------
## Intercept (after predictors centered)
##  ~ normal(location = 170, scale = 20)
## 
## Coefficients
##  ~ normal(location = 10, scale = 10)
## 
## Auxiliary (sigma)
##  ~ exponential(rate = 25)
##      **adjusted scale = 0.31 (adjusted rate = 1/adjusted scale)
## ------
## See help('prior_summary.stanreg') for more details

Show summary.

summary(fit_rstanarm_sex)

## 
## Model Info:
## 
##  function:     stan_glm
##  family:       gaussian [identity]
##  formula:      height ~ male
##  algorithm:    sampling
##  priors:       see help('prior_summary')
##  sample:       4000 (posterior sample size)
##  observations: 352
##  predictors:   2
## 
## Estimates:
##                 mean    sd      2.5%    25%     50%     75%     97.5%
## (Intercept)     149.5     0.4   148.7   149.3   149.5   149.8   150.3
## male             10.8     0.6     9.6    10.4    10.8    11.2    12.0
## sigma             5.4     0.2     5.0     5.3     5.4     5.6     5.8
## mean_PPD        154.6     0.4   153.8   154.3   154.6   154.9   155.4
## log-posterior -1122.3     1.3 -1125.6 -1122.9 -1122.0 -1121.4 -1120.9
## 
## Diagnostics:
##               mcse Rhat n_eff
## (Intercept)   0.0  1.0  4000 
## male          0.0  1.0  4000 
## sigma         0.0  1.0  4000 
## mean_PPD      0.0  1.0  4000 
## log-posterior 0.0  1.0  1592 
## 
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).

Plot posteriors

plot(fit_rstanarm_sex)

The trace plots show good mixing.

stan_trace(fit_rstanarm_sex)

Diagnostic plots.

stan_diag(fit_rstanarm_sex)

## Warning: Removed 1 rows containing missing values (geom_bar).

rstanarm age sex model

fit_rstanarm_age_sex <- rstanarm::stan_glm(formula = height ~ age*male,
                                              family = gaussian(),
                                              data = Howell1,
                                              prior_intercept = normal(178, 20, autoscale = FALSE),
                                              prior = normal(10, 10, autoscale = FALSE),
                                              prior_aux = exponential(25))