Exercise 6.17 (MCMC with RStan: Normal-Normal)

#Load packages
library(rstan)

## Loading required package: StanHeaders

## 
## rstan version 2.32.6 (Stan version 2.32.2)

## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
## change `threads_per_chain` option:
## rstan_options(threads_per_chain = 1)

## Do not specify '-march=native' in 'LOCAL_CPPFLAGS' or a Makevars file

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::extract() masks rstan::extract()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(bayesplot)

## This is bayesplot version 1.11.1
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting

library(ggplot2)
library(patchwork)
library(modeest)

#a)Simulate the posterior model of μ
# STEP 1: DEFINE the model
nn_model <- "
  data {
    real Y[4];
  }
  parameters {
    real  mu;
  }
  model {
    Y ~ normal(mu,1.3);
    mu ~ normal(10, 1.2);
  }
"

# STEP 2: SIMULATE the posterior
nn_sim <- stan(model_code = nn_model, data = list(Y = c(7.1,8.9,8.4,8.6)), 
               chains = 4, iter = 5000*2, seed = 84131)

## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 1.6e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.16 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 1: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 1: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 1: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 1: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 1: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 1: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 1: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 1: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 1: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 1: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 1: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.034 seconds (Warm-up)
## Chain 1:                0.037 seconds (Sampling)
## Chain 1:                0.071 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 3e-06 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.03 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 2: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 2: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 2: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 2: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 2: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 2: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 2: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 2: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 2: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 2: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 2: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 0.036 seconds (Warm-up)
## Chain 2:                0.04 seconds (Sampling)
## Chain 2:                0.076 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 3e-06 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.03 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 3: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 3: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 3: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 3: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 3: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 3: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 3: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 3: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 3: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 3: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 3: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 0.035 seconds (Warm-up)
## Chain 3:                0.037 seconds (Sampling)
## Chain 3:                0.072 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 3e-06 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.03 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 4: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 4: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 4: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 4: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 4: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 4: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 4: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 4: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 4: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 4: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 4: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 0.036 seconds (Warm-up)
## Chain 4:                0.041 seconds (Sampling)
## Chain 4:                0.077 seconds (Total)
## Chain 4:

as.array(nn_sim, pars = "mu") %>% 
  head()

## , , parameters = mu
## 
##           chains
## iterations  chain:1  chain:2  chain:3  chain:4
##       [1,] 8.012745 8.996135 9.054883 8.899130
##       [2,] 8.010866 8.539926 7.767423 8.323123
##       [3,] 8.134628 8.937379 7.513166 8.132565
##       [4,] 8.844287 8.597229 7.703219 8.600219
##       [5,] 7.863144 8.826715 7.646352 8.600219
##       [6,] 8.718994 8.284187 8.748467 8.426334

#b)trace and density plots
# Trace plots of the 4 Markov chains
mcmc_trace(nn_sim, pars = "mu", size = 0.1) +
  labs (x='Iteration',y='mu value',
        title='Trace plot - mu')

# Histogram and density of the Markov chain values

h<-mcmc_hist(nn_sim, pars = "mu")  +
  labs (x='mu',y='count',
        title='Histogram of the Markov chain values')
d<-mcmc_dens(nn_sim, pars = "mu") + 
  labs (x='mu',y='density',
        title='Density: Markov chain values') 
h+d

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#c)the most posterior plausible value of μ
summary(nn_sim)

## $summary
##           mean     se_mean        sd      2.5%       25%       50%       75%
## mu    8.636413 0.006435644 0.5675020  7.531818  8.252315  8.637980  9.016748
## lp__ -1.874426 0.007526076 0.7024246 -3.868765 -2.033714 -1.605152 -1.431223
##          97.5%    n_eff      Rhat
## mu    9.746576 7775.904 0.9999822
## lp__ -1.381834 8710.883 1.0001383
## 
## $c_summary
## , , chains = chain:1
## 
##          stats
## parameter      mean        sd      2.5%       25%       50%       75%     97.5%
##      mu    8.625290 0.5703317  7.514840  8.234343  8.627903  9.007937  9.732649
##      lp__ -1.879828 0.6957723 -3.859101 -2.035046 -1.613331 -1.434040 -1.381887
## 
## , , chains = chain:2
## 
##          stats
## parameter      mean        sd      2.5%       25%       50%       75%     97.5%
##      mu    8.641993 0.5672038  7.572678  8.250896  8.646806  9.029291  9.749062
##      lp__ -1.873701 0.6894157 -3.809574 -2.043093 -1.611479 -1.432305 -1.381834
## 
## , , chains = chain:3
## 
##          stats
## parameter      mean        sd      2.5%       25%       50%       75%     97.5%
##      mu    8.640853 0.5736780  7.508682  8.259275  8.637721  9.019350  9.777705
##      lp__ -1.885024 0.7375174 -3.979550 -2.043681 -1.601690 -1.425698 -1.381836
## 
## , , chains = chain:4
## 
##          stats
## parameter      mean        sd      2.5%       25%       50%       75%     97.5%
##      mu    8.637516 0.5587011  7.544987  8.265272  8.636673  9.011853  9.728482
##      lp__ -1.859150 0.6857091 -3.849954 -2.008353 -1.593002 -1.433194 -1.381780

#d)Specify the posterior model of μ. How does your MCMC approximation compare? 

# Histogram and density of the Markov chain values, mean and mode (posterior model)
#reorder data
chains<- data.frame(as.array(nn_sim, pars = "mu"))
vchains<-c(chains[,1],chains[,2],chains[,3],chains[,4])
dchains<-data.frame(vchains)

# Histogram of the Markov chain values
hist(dchains$vchains, 
     prob = TRUE,
     xlim = c(6, 11.5),
     col = "royalblue3",
     border = "blue",
     main = "Histogram and Density of MCMC values - Mode and Mean of Posterior", 
     xlab = "mu", 
     las = 1) 

# Density of the Markov chain values
lines(density(dchains$vchains), # density plot
      lwd = 2, # thickness of line
      col = "black")

# Mean (posteriori model)
segments(x0 = 8.647, y0 = 0, x1 = 8.647, 
         y1 = dnorm(8.647,8.647,0.5715), 
         col = "red", lwd = 2)

# Legend
legend(x ="topright", # Ubicación de la leyenda
       c("Density of MCMC values","Mean of Posterior"),
       col = c("black", "red"),
       lwd = c(1, 1, 1),
       bty = "n")

#Density of the Markov chain values and density Posterior, mean and mode (posterior model)

# Density posterior moodel Normal(8.647,0.5715^2)
x = 6:11.5
curve(dnorm(x, mean=8.647, sd=0.5715),6,11.5, # density plot
      xlab = "mu",
      ylab = "density",
      main = "Density of MCMC values and Posterior - Mode and Mean of Posterior", 
      lwd = 5, # thickness of line
      col = "orange")

# Density of the Markov chain values
lines(density(dchains$vchains), # density plot
      lwd = 2, # thickness of line
      col = "black")

# Mean (posteriori model)
segments(x0 = 8.647, y0 = 0, x1 = 8.647, 
         y1 = dnorm(8.647,8.647,0.5715), 
         col = "red", lwd = 2)

# Legend
legend(x ="topright", # Ubicación de la leyenda
       c("Density of Posterior","Density of MCMC values", "Mean of Posterior"),
       col = c("orange","black", "red"),
       lwd = c(1,1, 1, 1),
       bty = "n")

#Density plot of the Markov chain values and posterior density with mcmc_dens and stat_function
mcmc_dens(nn_sim, pars = "mu") + 
  labs (x='mu',y='density',
        title='Density of MCMC values and Posterior') +
  stat_function (
    fun=dnorm,
    args=list(mean=8.647,sd=0.5715),
    lwd = 1,
    color="black"
  )

Exercise 6.17 (MCMC with RStan: Normal-Normal)

Sebastián Grimberg