## Loading required package: Rcpp

## Loading 'brms' package (version 2.22.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').

## 
## Attaching package: 'brms'

## The following object is masked from 'package:stats':
## 
##     ar

## This is loo version 2.8.0

## - Online documentation and vignettes at mc-stan.org/loo

## - As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session.

## - Windows 10 users: loo may be very slow if 'mc.cores' is set in your .Rprofile file (see https://github.com/stan-dev/loo/issues/94).

## This is bayesplot version 1.11.1

## - Online documentation and vignettes at mc-stan.org/bayesplot

## - bayesplot theme set to bayesplot::theme_default()

##    * Does _not_ affect other ggplot2 plots

##    * See ?bayesplot_theme_set for details on theme setting

## 
## Attaching package: 'bayesplot'

## The following object is masked from 'package:brms':
## 
##     rhat

## 
## rstan version 2.32.6 (Stan version 2.32.2)

## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
## change `threads_per_chain` option:
## rstan_options(threads_per_chain = 1)

## Do not specify '-march=native' in 'LOCAL_CPPFLAGS' or a Makevars file

## This is rstantools version 2.4.0

## 
## Attaching package: 'coda'

## The following object is masked from 'package:rstan':
## 
##     traceplot

## Installing package into 'U:/R/4.4.2'
## (as 'lib' is unspecified)

## package 'Hmisc' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\local_hfry\Temp\3\RtmpIHqoR0\downloaded_packages

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:base':
## 
##     format.pval, units

## 
## Attaching package: 'mice'

## The following object is masked from 'package:stats':
## 
##     filter

## The following objects are masked from 'package:base':
## 
##     cbind, rbind

## This is posterior version 1.6.0

## 
## Attaching package: 'posterior'

## The following objects are masked from 'package:rstan':
## 
##     ess_bulk, ess_tail

## The following object is masked from 'package:bayesplot':
## 
##     rhat

## The following objects are masked from 'package:stats':
## 
##     mad, sd, var

## The following objects are masked from 'package:base':
## 
##     %in%, match

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:Hmisc':
## 
##     src, summarize

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Load libraries

These are the most typical libraries you’ll need for Bayesian analyses.

library(brms)
library(loo)
library(bayesplot)
library(readxl)
library(ggplot2)

Read in and check data

Making sure they are categorical variables.

complete_cancerlogreg <- read_xlsx("\\\\sscwin\\dfsroot\\users\\hfry\\Desktop\\Cancer Work Characteristics.xlsx")

str(complete_cancerlogreg)

## tibble [28 x 20] (S3: tbl_df/tbl/data.frame)
##  $ ID                   : num [1:28] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Employment_Status_Num: num [1:28] 0 0 0 1 1 0 1 1 0 0 ...
##  $ Job_Sat_Num          : num [1:28] 1 0 1 0 0 0 0 0 1 0 ...
##  $ Disclosure_All_Num   : num [1:28] 1 1 1 0 0 0 0 0 0 1 ...
##  $ Acc_Req_Rec          : num [1:28] 1 1 1 1 1 2 1 1 1 1 ...
##  $ Acc_Rec_Num          : num [1:28] 1 1 0 0 0 0 0 0 0 0 ...
##  $ Acc_Ask_Num          : num [1:28] 1 1 0 0 0 1 0 0 0 0 ...
##  $ Job_Perf_Num         : num [1:28] 0 1 1 0 0 0 0 0 1 0 ...
##  $ Job_Fit_Num          : num [1:28] 0 1 1 0 0 0 0 0 1 0 ...
##  $ Job_Fit_Num_2        : num [1:28] 0 1 1 0 1 1 0 0 1 1 ...
##  $ Job_Sat_Num_2        : num [1:28] 1 1 1 0 1 0 1 1 1 1 ...
##  $ Job_Perf_Num_2       : num [1:28] 1 1 1 0 1 1 0 0 1 1 ...
##  $ Cancer_Type_Num      : num [1:28] 3 5 5 1 1 5 3 5 3 4 ...
##  $ Cancer_Stage_Num     : num [1:28] 2 2 1 1 1 1 2 2 2 2 ...
##  $ Age_Diag_Num         : num [1:28] 8 27 32 21 18 20 26 36 59 78 ...
##  $ Gender_Num           : num [1:28] 1 1 1 1 1 1 1 2 2 1 ...
##  $ Age                  : num [1:28] 43 33 33 28 21 26 35 39 61 79 ...
##  $ Marital_Status_Num   : num [1:28] 2 1 1 1 3 3 1 1 2 1 ...
##  $ Income_Num           : num [1:28] 1 3 5 3 6 1 3 3 1 2 ...
##  $ Race_Num             : num [1:28] 3 3 2 1 2 4 1 1 4 1 ...

# Convert all variables except Age and Age_Diag_Num to factors
complete_cancerlogreg <- complete_cancerlogreg %>%
  mutate(across(-c(Age, Age_Diag_Num), as.factor))

# Check the structure to confirm the changes
str(complete_cancerlogreg)

## tibble [28 x 20] (S3: tbl_df/tbl/data.frame)
##  $ ID                   : Factor w/ 28 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Employment_Status_Num: Factor w/ 2 levels "0","1": 1 1 1 2 2 1 2 2 1 1 ...
##  $ Job_Sat_Num          : Factor w/ 2 levels "0","1": 2 1 2 1 1 1 1 1 2 1 ...
##  $ Disclosure_All_Num   : Factor w/ 2 levels "0","1": 2 2 2 1 1 1 1 1 1 2 ...
##  $ Acc_Req_Rec          : Factor w/ 2 levels "1","2": 1 1 1 1 1 2 1 1 1 1 ...
##  $ Acc_Rec_Num          : Factor w/ 2 levels "0","1": 2 2 1 1 1 1 1 1 1 1 ...
##  $ Acc_Ask_Num          : Factor w/ 2 levels "0","1": 2 2 1 1 1 2 1 1 1 1 ...
##  $ Job_Perf_Num         : Factor w/ 2 levels "0","1": 1 2 2 1 1 1 1 1 2 1 ...
##  $ Job_Fit_Num          : Factor w/ 2 levels "0","1": 1 2 2 1 1 1 1 1 2 1 ...
##  $ Job_Fit_Num_2        : Factor w/ 2 levels "0","1": 1 2 2 1 2 2 1 1 2 2 ...
##  $ Job_Sat_Num_2        : Factor w/ 2 levels "0","1": 2 2 2 1 2 1 2 2 2 2 ...
##  $ Job_Perf_Num_2       : Factor w/ 2 levels "0","1": 2 2 2 1 2 2 1 1 2 2 ...
##  $ Cancer_Type_Num      : Factor w/ 5 levels "1","2","3","4",..: 3 5 5 1 1 5 3 5 3 4 ...
##  $ Cancer_Stage_Num     : Factor w/ 2 levels "1","2": 2 2 1 1 1 1 2 2 2 2 ...
##  $ Age_Diag_Num         : num [1:28] 8 27 32 21 18 20 26 36 59 78 ...
##  $ Gender_Num           : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 2 2 1 ...
##  $ Age                  : num [1:28] 43 33 33 28 21 26 35 39 61 79 ...
##  $ Marital_Status_Num   : Factor w/ 3 levels "1","2","3": 2 1 1 1 3 3 1 1 2 1 ...
##  $ Income_Num           : Factor w/ 6 levels "1","2","3","4",..: 1 3 5 3 6 1 3 3 1 2 ...
##  $ Race_Num             : Factor w/ 5 levels "1","2","3","4",..: 3 3 2 1 2 4 1 1 4 1 ...

# Set the reference category for all factors to 1
complete_cancerlogreg <- complete_cancerlogreg %>%
  mutate(across(where(is.factor), ~ relevel(., ref = "1")))

# Check the structure to confirm the changes
str(complete_cancerlogreg)

## tibble [28 x 20] (S3: tbl_df/tbl/data.frame)
##  $ ID                   : Factor w/ 28 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Employment_Status_Num: Factor w/ 2 levels "1","0": 2 2 2 1 1 2 1 1 2 2 ...
##  $ Job_Sat_Num          : Factor w/ 2 levels "1","0": 1 2 1 2 2 2 2 2 1 2 ...
##  $ Disclosure_All_Num   : Factor w/ 2 levels "1","0": 1 1 1 2 2 2 2 2 2 1 ...
##  $ Acc_Req_Rec          : Factor w/ 2 levels "1","2": 1 1 1 1 1 2 1 1 1 1 ...
##  $ Acc_Rec_Num          : Factor w/ 2 levels "1","0": 1 1 2 2 2 2 2 2 2 2 ...
##  $ Acc_Ask_Num          : Factor w/ 2 levels "1","0": 1 1 2 2 2 1 2 2 2 2 ...
##  $ Job_Perf_Num         : Factor w/ 2 levels "1","0": 2 1 1 2 2 2 2 2 1 2 ...
##  $ Job_Fit_Num          : Factor w/ 2 levels "1","0": 2 1 1 2 2 2 2 2 1 2 ...
##  $ Job_Fit_Num_2        : Factor w/ 2 levels "1","0": 2 1 1 2 1 1 2 2 1 1 ...
##  $ Job_Sat_Num_2        : Factor w/ 2 levels "1","0": 1 1 1 2 1 2 1 1 1 1 ...
##  $ Job_Perf_Num_2       : Factor w/ 2 levels "1","0": 1 1 1 2 1 1 2 2 1 1 ...
##  $ Cancer_Type_Num      : Factor w/ 5 levels "1","2","3","4",..: 3 5 5 1 1 5 3 5 3 4 ...
##  $ Cancer_Stage_Num     : Factor w/ 2 levels "1","2": 2 2 1 1 1 1 2 2 2 2 ...
##  $ Age_Diag_Num         : num [1:28] 8 27 32 21 18 20 26 36 59 78 ...
##  $ Gender_Num           : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 2 2 1 ...
##  $ Age                  : num [1:28] 43 33 33 28 21 26 35 39 61 79 ...
##  $ Marital_Status_Num   : Factor w/ 3 levels "1","2","3": 2 1 1 1 3 3 1 1 2 1 ...
##  $ Income_Num           : Factor w/ 6 levels "1","2","3","4",..: 1 3 5 3 6 1 3 3 1 2 ...
##  $ Race_Num             : Factor w/ 5 levels "1","2","3","4",..: 3 3 2 1 2 4 1 1 4 1 ...

Now that the data is ready, let’s see how different priors impact the logistic regression model.

Prior sensitivity checks

Model with INFORMATIVE PRIORS from student-t distribution

The Student-t prior distribution has heavier tails compared to the normal prior and can be more appealing when weakly informative priors are needed, but still provides some regularization. ### Specify the priors

informative_priors_student_t <- c(
  prior(student_t(3, 0, 1), class = "Intercept"),                        
  prior(student_t(3, 1, 2), class = "b", coef = "Job_Sat_Num0"),         # Assuming positive effect
  prior(student_t(3, -1, 2), class = "b", coef = "Disclosure_All_Num0"), # Assuming negative effect
  prior(student_t(3, 0.5, 2), class = "b", coef = "Acc_Req_Rec2"),       # Assuming positive effect
  prior(student_t(3, 1.5, 2), class = "b", coef = "Job_Perf_Num0"),      # Assuming strong positive effect
  prior(student_t(3, 1, 2), class = "b", coef = "Job_Fit_Num0")          # Assuming positive effect
)

Fit model using brms

baylogreginfstu <- brm(
  formula = Employment_Status_Num ~ Job_Sat_Num + Disclosure_All_Num + 
    Acc_Req_Rec + Job_Perf_Num + Job_Fit_Num,
  data = complete_cancerlogreg,
  family = bernoulli(link = "logit"),
  prior = informative_priors_student_t,
  chains = 4,
  iter = 20000,
  warmup = 10000,
  thin = 10,
  seed = 1234
)

## Compiling Stan program...

## Start sampling

Display model summary

summary(baylogreginfstu)

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: Employment_Status_Num ~ Job_Sat_Num + Disclosure_All_Num + Acc_Req_Rec + Job_Perf_Num + Job_Fit_Num 
##    Data: complete_cancerlogreg (Number of observations: 28) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 10;
##          total post-warmup draws = 4000
## 
## Regression Coefficients:
##                     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept               2.70      1.27     0.57     5.58 1.00     4387     3930
## Job_Sat_Num0           -1.77      1.35    -4.64     0.65 1.00     4138     3851
## Disclosure_All_Num0    -0.23      0.95    -2.13     1.66 1.00     3957     3752
## Acc_Req_Rec2            1.06      1.35    -1.52     3.78 1.00     3840     4124
## Job_Perf_Num0          -2.64      1.44    -5.77    -0.07 1.00     3814     3857
## Job_Fit_Num0            0.56      1.16    -1.58     2.93 1.00     3951     4058
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Quick note: these coefficients are in the logit metric. You can convert to odds ratio easily by doing 1 - exp(coefficient). For example, for job performance, the odds ratio would be 1 - exp(-2.01) = 0.866. In this context, this indicates that the odds of being employed are about 86.6% lower for those with excellent job performance compared to those without. (We’ll come back to this.)

Display convergence diagnostics

But first, let’s look at the ones from the model summary.

\(\hat{R}\)

One of the most important diagnostics is the potential scale reduction factor. If the ratio of these two sources of variance are equal to one, then this is evidence that the chains have converged. If the \(\hat{R}\)> 1.01 you should investigate.

Bulk_ESS and Tail_ESS

These make up the measure of the effective sample size or n_eff. Higher values indicate better mixing and more reliable estimates. (How can you fix this if the estimates are too low? Try increasing more burn-in and post burn-in iterations. And take advantage of thinning.)

Divergent transitions in HMC

You might get a warning message in R saying “There were 22 divergent transitions after warmup.” The number isn’t important; the fact that you got the message is cause for concern. The divergent transitions are areas of the posterior distribution that were not explored at all by the algorithm, thus biasing your results. You will need to go back and check the data and your priors.

Trace plots

The trace plot should look like a “random scatter” around a horizontal line, indicating that the chain has reached a stationary distribution. There should be no apparent trends or drifts over time, it should be tight. The colors should be difficult to distinguish.

mcmc_trace(baylogreginfstu, pars = c("b_Job_Sat_Num0", 
                                     "b_Disclosure_All_Num0", 
                                     "b_Acc_Req_Rec2", 
                                     "b_Job_Perf_Num0", 
                                     "b_Job_Fit_Num0"))

Density plots

The posterior density plot the plot of the draws for each parameter in the model and for each chain. Strong deviation from normality (especially serious bi-modality) suggests issues with convergence of the parameter. (How can you fix this? more iterations, better priors, or both!)

mcmc_dens(baylogreginfstu, pars = c("b_Job_Sat_Num0", 
                                    "b_Disclosure_All_Num0", 
                                    "b_Acc_Req_Rec2", 
                                    "b_Job_Perf_Num0", 
                                    "b_Job_Fit_Num0"))

Autocorrelation plots

These plots show us how quickly the draws from the posterior distribution became independent.Should be a very quick, steep drop-off, then remain steady.

mcmc_acf(baylogreginfstu, pars = c("b_Job_Sat_Num0", 
                                   "b_Disclosure_All_Num0", 
                                   "b_Acc_Req_Rec2", 
                                   "b_Job_Perf_Num0", 
                                   "b_Job_Fit_Num0"))

Posterior predictive checks

These plots show us how close our observed data is to replicated data that is simulated from the posterior predictive distribution. The y and y_rep should closely match each other; if they do, that means our model is a good fit for the data

pp_check(baylogreginfstu, type = "bars", ndraws = 100)

Model comparison using LOO and WAIC

We’ll look at this later when we compare all of the models side-by-side.

loo_baylogreginfstu <- loo(baylogreginfstu, moment_match = TRUE, reloo = TRUE)

## No problematic observations found. Returning the original 'loo' object.

print(loo_baylogreginfstu)

## 
## Computed from 4000 by 28 log-likelihood matrix.
## 
##          Estimate  SE
## elpd_loo    -18.6 4.2
## p_loo         5.8 1.8
## looic        37.3 8.5
## ------
## MCSE of elpd_loo is 0.1.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.9, 1.0]).
## 
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.

Model with WEAKLY INFORMATIVE priors from student-t distribution

Specify the priors

weakly_informative_priors_student_t <- c(
  prior(normal(0, 1), class = "Intercept"),
  prior(student_t(3, 0, 2.5), class = "b", coef = "Acc_Req_Rec2"),        # No assumptions about the effect
  prior(student_t(3, 0, 2.5), class = "b", coef = "Disclosure_All_Num0"), # No assumptions about the effect
  prior(student_t(3, 0, 2.5), class = "b", coef = "Job_Sat_Num0"),        # No assumptions about the effect
  prior(student_t(3, 0, 2.5), class = "b", coef = "Job_Perf_Num0"),       # No assumptions about the effect
  prior(student_t(3, 0, 2.5), class = "b", coef = "Job_Fit_Num0")         # No assumptions about the effect
)

Fit model using brms

baylogregweakinfstu <- brm(
  formula = Employment_Status_Num ~ Job_Sat_Num + Disclosure_All_Num + 
    Acc_Req_Rec + Job_Perf_Num + Job_Fit_Num,
  data = complete_cancerlogreg,
  family = bernoulli(link = "logit"),
  prior = weakly_informative_priors_student_t,
  chains = 4,
  iter = 20000,
  warmup = 10000,
  thin = 10,
  seed = 1234
)

## Compiling Stan program...

## Start sampling

Display model summary

summary(baylogregweakinfstu)

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: Employment_Status_Num ~ Job_Sat_Num + Disclosure_All_Num + Acc_Req_Rec + Job_Perf_Num + Job_Fit_Num 
##    Data: complete_cancerlogreg (Number of observations: 28) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 10;
##          total post-warmup draws = 4000
## 
## Regression Coefficients:
##                     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept               3.00      1.31     0.69     5.78 1.00     4055     3954
## Job_Sat_Num0           -2.07      1.38    -5.08     0.44 1.00     4185     4059
## Disclosure_All_Num0     0.11      1.04    -1.92     2.21 1.00     3435     3763
## Acc_Req_Rec2            1.06      1.52    -1.89     4.12 1.00     3582     3740
## Job_Perf_Num0          -2.78      1.38    -5.81    -0.35 1.00     4041     4098
## Job_Fit_Num0            0.40      1.24    -1.93     2.98 1.00     4220     3959
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Display convergence diagnostics

Trace plots

mcmc_trace(baylogregweakinfstu, pars = c("b_Job_Sat_Num0", 
                                     "b_Disclosure_All_Num0", 
                                     "b_Acc_Req_Rec2", 
                                     "b_Job_Perf_Num0", 
                                     "b_Job_Fit_Num0"))

Density plots

mcmc_dens(baylogregweakinfstu, pars = c("b_Job_Sat_Num0", 
                                    "b_Disclosure_All_Num0", 
                                    "b_Acc_Req_Rec2", 
                                    "b_Job_Perf_Num0", 
                                    "b_Job_Fit_Num0"))

Autocorrelation plots

mcmc_acf(baylogregweakinfstu, pars = c("b_Job_Sat_Num0", 
                                   "b_Disclosure_All_Num0", 
                                   "b_Acc_Req_Rec2", 
                                   "b_Job_Perf_Num0", 
                                   "b_Job_Fit_Num0"))

Posterior predictive checks

pp_check(baylogregweakinfstu, type = "bars", ndraws = 100)

Model comparison using LOO and WAIC

loo_baylogregweakinfstu <- loo(baylogregweakinfstu, moment_match = TRUE, reloo = TRUE)

## No problematic observations found. Returning the original 'loo' object.

print(loo_baylogregweakinfstu)

## 
## Computed from 4000 by 28 log-likelihood matrix.
## 
##          Estimate  SE
## elpd_loo    -19.1 4.7
## p_loo         6.4 2.0
## looic        38.2 9.3
## ------
## MCSE of elpd_loo is 0.1.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.9, 1.1]).
## 
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.

Model with NON-INFORMATIVE priors from student-t distribution

Specify the priors

non_informative_priors_student_t <- c(
  prior(student_t(3, 0, 10), class = "Intercept"),
  prior(student_t(3, 0, 10), class = "b", coef = "Job_Sat_Num0"),
  prior(student_t(3, 0, 10), class = "b", coef = "Disclosure_All_Num0"),
  prior(student_t(3, 0, 10), class = "b", coef = "Acc_Req_Rec2"),
  prior(student_t(3, 0, 10), class = "b", coef = "Job_Perf_Num0"),
  prior(student_t(3, 0, 10), class = "b", coef = "Job_Fit_Num0")
)

Fit model using brms

baylogregnoninfstu <- brm(
  formula = Employment_Status_Num ~ Job_Sat_Num + Disclosure_All_Num + 
    Acc_Req_Rec + Job_Perf_Num + Job_Fit_Num,
  data = complete_cancerlogreg,
  family = bernoulli(link = "logit"),
  prior = non_informative_priors_student_t,
  chains = 4,
  iter = 20000,
  warmup = 10000,
  thin = 10,
  seed = 1234
)

## Compiling Stan program...

## Start sampling

Display model summary

summary(baylogregnoninfstu)

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: Employment_Status_Num ~ Job_Sat_Num + Disclosure_All_Num + Acc_Req_Rec + Job_Perf_Num + Job_Fit_Num 
##    Data: complete_cancerlogreg (Number of observations: 28) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 10;
##          total post-warmup draws = 4000
## 
## Regression Coefficients:
##                     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept               4.20      1.94     1.08     8.45 1.00     3957     3779
## Job_Sat_Num0           -3.09      1.83    -6.94     0.18 1.00     4084     3961
## Disclosure_All_Num0     0.30      1.27    -2.12     2.94 1.00     3607     3692
## Acc_Req_Rec2            1.59      1.87    -2.10     5.29 1.00     3802     3852
## Job_Perf_Num0          -4.04      1.89    -8.19    -0.86 1.00     3839     4054
## Job_Fit_Num0            1.15      1.74    -1.91     5.01 1.00     3911     3585
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Display convergence diagnostics

Trace plots

mcmc_trace(baylogregnoninfstu, pars = c("b_Job_Sat_Num0", 
                                     "b_Disclosure_All_Num0", 
                                     "b_Acc_Req_Rec2", 
                                     "b_Job_Perf_Num0", 
                                     "b_Job_Fit_Num0"))

Density plots

mcmc_dens(baylogregnoninfstu, pars = c("b_Job_Sat_Num0", 
                                    "b_Disclosure_All_Num0", 
                                    "b_Acc_Req_Rec2", 
                                    "b_Job_Perf_Num0", 
                                    "b_Job_Fit_Num0"))

Autocorrelation plots

mcmc_acf(baylogregnoninfstu, pars = c("b_Job_Sat_Num0", 
                                   "b_Disclosure_All_Num0", 
                                   "b_Acc_Req_Rec2", 
                                   "b_Job_Perf_Num0", 
                                   "b_Job_Fit_Num0"))

Posterior predictive checks

pp_check(baylogregnoninfstu, type = "bars", ndraws = 100)

Model comparison using LOO and WAIC

loo_baylogregnoninfstu <- loo(baylogregnoninfstu, moment_match = TRUE, reloo = TRUE)

## Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.

## 1 problematic observation(s) found.
## The model will be refit 1 times.

## 
## Fitting model 1 out of 1 (leaving out observation 26)

## Start sampling

print(loo_baylogregnoninfstu)

## 
## Computed from 4000 by 28 log-likelihood matrix.
## 
##          Estimate   SE
## elpd_loo    -25.4  8.8
## p_loo        12.9  5.6
## looic        50.8 17.6
## ------
## MCSE of elpd_loo is 0.2.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.9, 1.0]).
## 
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.

Side-by-side comparison of models

Compare models using LOO

There are a couple things to pay attention to:

elpd_loo (or expected lob predictive density): Higher elpd_loo values indicate better predictive accuracy.
p_loo (or effective number of parameters): Lower p_loo values indicate a simpler (more parsimonious) model with fewer effective parameters.
looic (or LOO information criterion): Lower looic values indicate better model fit.
MCSE (or Monte Carlo standard error): Low MCSE indicate high precision of the elpd_loo estimate. If it says NA that can indicate issues with the MCMC sampling.
Pareto k diagnostic values: These values assess the reliability of the loo estimates by identifying observations that may have a disproportionate influence on the loo estimates. Good Pareto k estimates are k < 0.7, bad estimates are 0.7 k < 1; and very bad estimates are k > 1.

loo_results <- loo(baylogregnoninfstu, baylogregweakinfstu, baylogreginfstu, moment_match = TRUE, reloo = TRUE)

## Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.

## 1 problematic observation(s) found.
## The model will be refit 1 times.

## 
## Fitting model 1 out of 1 (leaving out observation 26)

## Start sampling

## No problematic observations found. Returning the original 'loo' object.
## No problematic observations found. Returning the original 'loo' object.

print(loo_results)

## Output of model 'baylogregnoninfstu':
## 
## Computed from 4000 by 28 log-likelihood matrix.
## 
##          Estimate   SE
## elpd_loo    -25.2  8.6
## p_loo        12.8  5.5
## looic        50.4 17.3
## ------
## MCSE of elpd_loo is 0.2.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.9, 1.0]).
## 
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## Output of model 'baylogregweakinfstu':
## 
## Computed from 4000 by 28 log-likelihood matrix.
## 
##          Estimate  SE
## elpd_loo    -19.1 4.7
## p_loo         6.4 2.0
## looic        38.2 9.3
## ------
## MCSE of elpd_loo is 0.1.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.9, 1.1]).
## 
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## Output of model 'baylogreginfstu':
## 
## Computed from 4000 by 28 log-likelihood matrix.
## 
##          Estimate  SE
## elpd_loo    -18.6 4.2
## p_loo         5.8 1.8
## looic        37.3 8.5
## ------
## MCSE of elpd_loo is 0.1.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.9, 1.0]).
## 
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## Model comparisons:
##                     elpd_diff se_diff
## baylogreginfstu      0.0       0.0   
## baylogregweakinfstu -0.5       0.6   
## baylogregnoninfstu  -6.6       4.6

Compare models using bayesian r-squared

This is a measure of how well the model explains the variability in the observed data, sound familiar? The higher the value, the larger the variance in the dependent variable is explained by the model, suggesting a good fit. These come with the point estimate and the 95% credible interval. The smaller the credible interval, the more precise (certain) the estimate; the larger the interval, the less precise (more uncertain) the estimate.

r2_noninfstu <- bayes_R2(baylogregnoninfstu)
r2_weakinfstu <- bayes_R2(baylogregweakinfstu)
r2_infstu <- bayes_R2(baylogreginfstu)

Print bayesian r-squared values

print(r2_noninfstu)

##     Estimate  Est.Error      Q2.5     Q97.5
## R2 0.4490164 0.07914935 0.2602134 0.5788573

print(r2_weakinfstu)

##     Estimate  Est.Error      Q2.5     Q97.5
## R2 0.3929171 0.08745205 0.1957688 0.5309543

print(r2_infstu)

##     Estimate  Est.Error      Q2.5     Q97.5
## R2 0.3706217 0.09261548 0.1638938 0.5205366

ARRPE Conference 2025 Exercise

Hannah Fry

2025-02-20

Load libraries

Read in and check data

Prior sensitivity checks

Model with INFORMATIVE PRIORS from student-t distribution

Fit model using brms

Display model summary

Display convergence diagnostics

\(\hat{R}\)

Bulk_ESS and Tail_ESS

Divergent transitions in HMC

Trace plots

Density plots

Autocorrelation plots

Posterior predictive checks

Model comparison using LOO and WAIC

Model with WEAKLY INFORMATIVE priors from student-t distribution

Specify the priors

Fit model using brms

Display model summary

Display convergence diagnostics

Trace plots

Density plots

Autocorrelation plots

Posterior predictive checks

Model comparison using LOO and WAIC

Model with NON-INFORMATIVE priors from student-t distribution

Specify the priors

Fit model using brms

Display model summary

Display convergence diagnostics

Trace plots

Density plots

Autocorrelation plots

Posterior predictive checks

Model comparison using LOO and WAIC

Side-by-side comparison of models

Compare models using LOO

Compare models using bayesian r-squared