This answer sheet is my solutions to exercises from “An Introduction to Bayesian Data Analysis for Cognitive Science” by Bruno Nicenboim, Daniel J. Schad, and Shravan Vasishth, demonstrating my proficiency in R programming. Chapters are separated into different files.
I welcome corrections for any mistakes and am open to discussions to further enhance understanding and application of these concepts.
Click here to access the book: An Introduction to Bayesian Data Analysis
Set up
#Required packages
library(brms)
library(ggplot2)
library(bayesplot)
library(dplyr)
library(rstan)
library(gridExtra)
#Set seed
set.seed(522025)
Generate some simulated independent and identically distributed data with n=100 data points as follows: y <- rnorm(100, mean = 500, sd = 50)
Next, fit a simple linear model with a normal likelihood: \[y_{n} \sim \text{Normal}(\mu, \sigma)\] Specify the following priors: \[\mu \sim \text{Uniform}(0, 60000)\] \[\sigma \sim \text{Uniform}(0, 2000)\] Generate posterior predictive distributions of the parameters and check that the true values of the parameters \(μ=500\),\(σ=50\) are recovered by the model. What this means is that you should check whether these true values lie within the range of the posterior distributions of the two parameters. This is a good sanity check for finding out whether a model can in principle recover the true parameter values correctly. —
#True parameters
mean_3.1 <- 500 #this is intercept
sd_3.1 <- 50
#Generate data independent and identically distributed data. I rename the variable to follow my documentation in this file.
data_3.1 <- rnorm(100, mean = mean_3.1, sd = sd_3.1)
df_3.1 <- data.frame(y = data_3.1) #convert to data frame
#Fit a simple linear model with a normal likelihood
suppressMessages(suppressWarnings(fit_press_3.1 <-
brm(y ~ 1,
data = df_3.1,
family = gaussian(),
prior = c(prior(uniform(0, 60000),
class = Intercept,
lb = 0, ub = 60000),
prior(uniform(0, 2000),
class = sigma,
lb = 0, ub = 2000)),
chain = 4, iter = 2000, warmup = 1000))) #this means final samples used for inference is iteration - warm up (for high divergences)
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 15.0.0 (clang-1500.0.40.1)’
## using SDK: ‘MacOSX14.0.sdk’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/opt/R/arm64/include -fPIC -falign-functions=64 -Wall -g -O2 -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
## #include <cmath>
## ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 5.4e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.54 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.024 seconds (Warm-up)
## Chain 1: 0.02 seconds (Sampling)
## Chain 1: 0.044 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 4e-06 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.024 seconds (Warm-up)
## Chain 2: 0.02 seconds (Sampling)
## Chain 2: 0.044 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 4e-06 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 0.024 seconds (Warm-up)
## Chain 3: 0.02 seconds (Sampling)
## Chain 3: 0.044 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 7e-06 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.07 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 0.023 seconds (Warm-up)
## Chain 4: 0.022 seconds (Sampling)
## Chain 4: 0.045 seconds (Total)
## Chain 4:
summary(fit_press_3.1)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: y ~ 1
## Data: df_3.1 (Number of observations: 100)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 501.24 4.96 491.33 511.18 1.00 3579 2457
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 47.56 3.42 41.46 55.08 1.00 3675 2592
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#Check recovery
post_samples_3.1 <- as_draws_df(fit_press_3.1) #extracting samples
mean_CI_3.1 <- post_samples_3.1$b_Intercept %>% quantile(c(0.025, 0.975)) #95% CI for mean
sd_CI_3.1 <- post_samples_3.1$sigma %>% quantile(c(0.025, 0.975)) #95% CI for standard deviation
#Do true values lie inside the 95% credible intervals?
mean_in_CI_3.1 <- (mean_3.1 >= mean_CI_3.1[1]) & (mean_3.1 >= mean_CI_3.1[2])
sd_in_CI_3.1 <- (sd_3.1 >= sd_CI_3.1[1]) & (sd_3.1 >= sd_CI_3.1[2])
#plot posteriors
plot(fit_press_3.1)
#posterior predictive check
print(pp_check(fit_press_3.1, ndraws = 100, type = "dens_overlay") +
ggtitle ("Posterior Predictive Check : Density Overlay"))
#plot posterior predictive distribution of statistical summaries (mean)
print(pp_check(fit_press_3.1, ndraws = 100, type = "stat", stat = "mean") +
ggtitle("Posterior Predictive Check : Mean"))
#plot prior predictive distribution of statistical summaries (mean)
print(pp_check(fit_press_3.1, ndraws = 100, type = "stat", stat = "mean", prefix = "ppd") +
ggtitle("Prior Predictive Check : Mean"))
Fit the model fit_press with just a few iterations, say 50 iterations (set warmup to the default of 25, and use four chains). Does the model converge?
Exercise 3.2 A simple linear model - A
#Fitting the model with a few iterations
suppressMessages(suppressWarnings(
invisible(fit_press_3.2 <-
brm(y ~ 1,
data = df_3.1, #reuse data from exercise 3.1 (previous chunk must be run before running this chunk)
family = gaussian(),
prior = c(prior(uniform(0, 60000),
class = Intercept,
lb = 0, ub = 60000),
prior(uniform(0, 2000),
class = sigma,
lb = 0, ub = 2000)),
chain = 4, iter = 50, warmup = 25))))
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 15.0.0 (clang-1500.0.40.1)’
## using SDK: ‘MacOSX14.0.sdk’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/opt/R/arm64/include -fPIC -falign-functions=64 -Wall -g -O2 -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
## #include <cmath>
## ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 2.7e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.27 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: WARNING: There aren't enough warmup iterations to fit the
## Chain 1: three stages of adaptation as currently configured.
## Chain 1: Reducing each adaptation stage to 15%/75%/10% of
## Chain 1: the given number of warmup iterations:
## Chain 1: init_buffer = 3
## Chain 1: adapt_window = 20
## Chain 1: term_buffer = 2
## Chain 1:
## Chain 1: Iteration: 1 / 50 [ 2%] (Warmup)
## Chain 1: Iteration: 5 / 50 [ 10%] (Warmup)
## Chain 1: Iteration: 10 / 50 [ 20%] (Warmup)
## Chain 1: Iteration: 15 / 50 [ 30%] (Warmup)
## Chain 1: Iteration: 20 / 50 [ 40%] (Warmup)
## Chain 1: Iteration: 25 / 50 [ 50%] (Warmup)
## Chain 1: Iteration: 26 / 50 [ 52%] (Sampling)
## Chain 1: Iteration: 30 / 50 [ 60%] (Sampling)
## Chain 1: Iteration: 35 / 50 [ 70%] (Sampling)
## Chain 1: Iteration: 40 / 50 [ 80%] (Sampling)
## Chain 1: Iteration: 45 / 50 [ 90%] (Sampling)
## Chain 1: Iteration: 50 / 50 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0 seconds (Warm-up)
## Chain 1: 0 seconds (Sampling)
## Chain 1: 0 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 4e-06 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: WARNING: There aren't enough warmup iterations to fit the
## Chain 2: three stages of adaptation as currently configured.
## Chain 2: Reducing each adaptation stage to 15%/75%/10% of
## Chain 2: the given number of warmup iterations:
## Chain 2: init_buffer = 3
## Chain 2: adapt_window = 20
## Chain 2: term_buffer = 2
## Chain 2:
## Chain 2: Iteration: 1 / 50 [ 2%] (Warmup)
## Chain 2: Iteration: 5 / 50 [ 10%] (Warmup)
## Chain 2: Iteration: 10 / 50 [ 20%] (Warmup)
## Chain 2: Iteration: 15 / 50 [ 30%] (Warmup)
## Chain 2: Iteration: 20 / 50 [ 40%] (Warmup)
## Chain 2: Iteration: 25 / 50 [ 50%] (Warmup)
## Chain 2: Iteration: 26 / 50 [ 52%] (Sampling)
## Chain 2: Iteration: 30 / 50 [ 60%] (Sampling)
## Chain 2: Iteration: 35 / 50 [ 70%] (Sampling)
## Chain 2: Iteration: 40 / 50 [ 80%] (Sampling)
## Chain 2: Iteration: 45 / 50 [ 90%] (Sampling)
## Chain 2: Iteration: 50 / 50 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0 seconds (Warm-up)
## Chain 2: 0 seconds (Sampling)
## Chain 2: 0 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 4e-06 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: WARNING: There aren't enough warmup iterations to fit the
## Chain 3: three stages of adaptation as currently configured.
## Chain 3: Reducing each adaptation stage to 15%/75%/10% of
## Chain 3: the given number of warmup iterations:
## Chain 3: init_buffer = 3
## Chain 3: adapt_window = 20
## Chain 3: term_buffer = 2
## Chain 3:
## Chain 3: Iteration: 1 / 50 [ 2%] (Warmup)
## Chain 3: Iteration: 5 / 50 [ 10%] (Warmup)
## Chain 3: Iteration: 10 / 50 [ 20%] (Warmup)
## Chain 3: Iteration: 15 / 50 [ 30%] (Warmup)
## Chain 3: Iteration: 20 / 50 [ 40%] (Warmup)
## Chain 3: Iteration: 25 / 50 [ 50%] (Warmup)
## Chain 3: Iteration: 26 / 50 [ 52%] (Sampling)
## Chain 3: Iteration: 30 / 50 [ 60%] (Sampling)
## Chain 3: Iteration: 35 / 50 [ 70%] (Sampling)
## Chain 3: Iteration: 40 / 50 [ 80%] (Sampling)
## Chain 3: Iteration: 45 / 50 [ 90%] (Sampling)
## Chain 3: Iteration: 50 / 50 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 0 seconds (Warm-up)
## Chain 3: 0 seconds (Sampling)
## Chain 3: 0 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 4e-06 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: WARNING: There aren't enough warmup iterations to fit the
## Chain 4: three stages of adaptation as currently configured.
## Chain 4: Reducing each adaptation stage to 15%/75%/10% of
## Chain 4: the given number of warmup iterations:
## Chain 4: init_buffer = 3
## Chain 4: adapt_window = 20
## Chain 4: term_buffer = 2
## Chain 4:
## Chain 4: Iteration: 1 / 50 [ 2%] (Warmup)
## Chain 4: Iteration: 5 / 50 [ 10%] (Warmup)
## Chain 4: Iteration: 10 / 50 [ 20%] (Warmup)
## Chain 4: Iteration: 15 / 50 [ 30%] (Warmup)
## Chain 4: Iteration: 20 / 50 [ 40%] (Warmup)
## Chain 4: Iteration: 25 / 50 [ 50%] (Warmup)
## Chain 4: Iteration: 26 / 50 [ 52%] (Sampling)
## Chain 4: Iteration: 30 / 50 [ 60%] (Sampling)
## Chain 4: Iteration: 35 / 50 [ 70%] (Sampling)
## Chain 4: Iteration: 40 / 50 [ 80%] (Sampling)
## Chain 4: Iteration: 45 / 50 [ 90%] (Sampling)
## Chain 4: Iteration: 50 / 50 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 0 seconds (Warm-up)
## Chain 4: 0 seconds (Sampling)
## Chain 4: 0 seconds (Total)
## Chain 4:
print(fit_press_3.2)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: y ~ 1
## Data: df_3.1 (Number of observations: 100)
## Draws: 4 chains, each with iter = 50; warmup = 25; thin = 1;
## total post-warmup draws = 100
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 500.82 8.73 484.69 513.34 1.92 15 45
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 81.05 24.87 57.77 140.87 2.40 7 20
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(fit_press_3.2)
Exercise 3.2 A simple linear model - B
# Install devtools to get bcogsci package where spacebar dataset is stored
suppressMessages(suppressWarnings(install.packages("devtools")))
##
## The downloaded binary packages are in
## /var/folders/5p/twfxkkzx5f7d9l7mg3nb0tc00000gn/T//RtmpZx1kex/downloaded_packages
# Install bcogsci from GitHub
devtools::install_github("bnicenboim/bcogsci")
# Load the bcogsci package
library(bcogsci)
data("df_spacebar")
head(df_spacebar)
## # A tibble: 6 × 2
## t trial
## <int> <int>
## 1 141 1
## 2 138 2
## 3 128 3
## 4 132 4
## 5 126 5
## 6 134 6
#checking the data to make prior assumption
summary(df_spacebar$t)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 110.0 156.0 166.0 168.6 181.0 409.0
sd(df_spacebar$t)
## [1] 24.9118
#sensitivity test to check the power of prior
prior_list_3.2 <- list(
baseline_3.2 = c(
prior(normal(170,50), class = Intercept),
prior(normal(20,20), class = sigma, lb = 0)),
tighter_3.2 = c(
prior(normal(170,50), class = Intercept),
prior(normal(20,10), class = sigma, lb = 0)),
looser_3.2 = c(
prior(normal(170,50), class = Intercept),
prior(normal(20,50), class = sigma, lb =0)),
weak_3.2 = c(
prior(normal(170,50), class = Intercept),
prior(normal(20,100), class = sigma, lb = 0)))
model_3.2b <- list() #for storing models
for (prior3.2 in names(prior_list_3.2)) {
cat("\nModel with prior:", prior3.2, "\n") # Print label before model summary
capture.output(model_3.2b[[prior3.2]] <- brm(
t ~ 1,
data = df_spacebar,
family = gaussian(),
prior = prior_list_3.2[[prior3.2]],
iter = 2000,
warmup = 1000,
chains = 4))
print(summary(model_3.2b[[prior3.2]])) # Print summary immediately after label
}
##
## Model with prior: baseline_3.2
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: t ~ 1
## Data: df_spacebar (Number of observations: 361)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 168.63 1.35 165.93 171.22 1.00 3391 2455
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 25.00 0.97 23.20 27.00 1.00 3524 2375
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
##
## Model with prior: tighter_3.2
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: t ~ 1
## Data: df_spacebar (Number of observations: 361)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 168.64 1.33 166.06 171.27 1.00 3325 2753
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 24.96 0.96 23.22 27.01 1.00 4140 2674
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
##
## Model with prior: looser_3.2
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: t ~ 1
## Data: df_spacebar (Number of observations: 361)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 168.61 1.31 165.99 171.18 1.00 3643 2943
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 25.00 0.94 23.22 27.01 1.00 3758 2519
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
##
## Model with prior: weak_3.2
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: t ~ 1
## Data: df_spacebar (Number of observations: 361)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 168.65 1.31 166.18 171.22 1.00 3234 2555
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 24.98 0.94 23.21 26.87 1.00 3413 2731
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
library(tidyverse)
#Analyse posterior distributions
#Extracting posterior samples from all models and storing in a data frame
df_post_3.2b <- bind_rows(lapply(names(model_3.2b), function(prior3.2){
post_samples_3.2b <- as_draws_df(model_3.2b[[prior3.2]])
data.frame(
mu = post_samples_3.2b$b_Intercept,
sigma = post_samples_3.2b$sigma,
Prior_set = prior3.2
)
}))
#Comparing posterior distributions with plots
#Mean tapping time
ggplot(df_post_3.2b, aes(x = mu, fill = Prior_set)) +
geom_density(alpha = 0.4) +
labs(title = "Comparison of Posterior Distributions for Mean Tapping Time", x = "Mean", y = "Density") +
theme_minimal()
#Variability in tapping time
ggplot(df_post_3.2b, aes(x = sigma, fill = Prior_set)) +
geom_density(alpha = 0.4) +
labs(title = "Comparison of Posterior Distributions for Variability in Tapping Time", x = "Standard deviation", y = "Density") +
theme_minimal()
Exercise 3.4 Posterior predictive checks with a log-normal model - point A
Fit the Log-Normal Model with (-2, 0.5)
fit_press_ln_3.4a <- brm(
t ~ 1,
data = df_spacebar,
family = lognormal(),
prior = c(
prior(normal(6, 1.5), class = Intercept),
prior(normal(-2, 0.5), class = sigma)
), chains = 4, iter = 2000, warmup = 1000, refresh = 0)
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 15.0.0 (clang-1500.0.40.1)’
## using SDK: ‘MacOSX14.0.sdk’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/opt/R/arm64/include -fPIC -falign-functions=64 -Wall -g -O2 -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
## #include <cmath>
## ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
#summary
summary(fit_press_ln_3.4a)
## Family: lognormal
## Links: mu = identity; sigma = identity
## Formula: t ~ 1
## Data: df_spacebar (Number of observations: 361)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 5.12 0.01 5.10 5.13 1.00 3531 2745
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.13 0.01 0.13 0.15 1.00 2604 2052
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#Extract samples
post_samples_3.4a <- as_draws_df(fit_press_ln_3.4a)
#Get mean and sd in log-scale
mean_intercept_3.4a <- mean(post_samples_3.4a$b_Intercept)
sd_intercept_3.4a <- sd(post_samples_3.4a$b_Intercept)
mean_sigma_3.4a <- mean(post_samples_3.4a$sigma)
sd_sigma_3.4a <- sd(post_samples_3.4a$sigma)
#convert mean and sd from log-scale to milisecond
mean_ln_3.4a <- exp(mean_intercept_3.4a + (mean_sigma_3.4a^2) / 2)
sd_ln_3.4a <- sqrt(exp(mean_sigma_3.4a ^ 2) - 1) * exp(2 * mean_intercept_3.4a + mean_sigma_3.4a^2)
#print
cat("\n\n")
cat("The estimated mean of model with LogNormal (-2, 0.5) is ", mean_ln_3.4a, "milliseconds \n")
## The estimated mean of model with LogNormal (-2, 0.5) is 168.5897 milliseconds
cat("The estimated sd of model with LogNormal (-2, 0.5) is ", sd_ln_3.4a, "milliseconds \n")
## The estimated sd of model with LogNormal (-2, 0.5) is 3851.331 milliseconds
Generate and Plot Prior Predictive Distributions
fit_prior_ln_3.4a <- brm(
t ~ 1,
data = df_spacebar,
family = lognormal(),
prior = c(
prior(normal(6, 1.5), class = Intercept),
prior(normal(-2, 0.5), class = sigma)
), chains = 4, iter = 2000, warmup = 1000, refresh = 0
)
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 15.0.0 (clang-1500.0.40.1)’
## using SDK: ‘MacOSX14.0.sdk’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/opt/R/arm64/include -fPIC -falign-functions=64 -Wall -g -O2 -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
## #include <cmath>
## ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
#Plot
pp_check(fit_prior_ln_3.4a, ndraws = 100, type = "dens_overlay") +
ggtitle("(3.4 A) Prior Predictive Distribution with LogNormal (-2, 0.5) prior for Sigma")
Generate and Plot Prior Predictive Distributions from Previous Model in Exercise 3.2 B
#Generate prior predictive samples
fit_prior_baseline_3.2b <- brm(
t~ 1,
data = df_spacebar,
family = gaussian(),
prior = prior_list_3.2[["baseline_3.2"]],
sample_prior = "only",
chains = 4, iter = 2000, warmup = 1000, refresh = 0
)
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 15.0.0 (clang-1500.0.40.1)’
## using SDK: ‘MacOSX14.0.sdk’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/opt/R/arm64/include -fPIC -falign-functions=64 -Wall -g -O2 -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
## #include <cmath>
## ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
#Plot
pp_check(fit_prior_baseline_3.2b, ndraws = 100, type = "dens_overlay") +
ggtitle("(3.2 B) Prior Predictive Distribution with Normal (20, 20) prior for Sigma")