## Loading required package: Rcpp
## Loading 'brms' package (version 2.22.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
##
## Attaching package: 'brms'
## The following object is masked from 'package:stats':
##
## ar
## This is loo version 2.8.0
## - Online documentation and vignettes at mc-stan.org/loo
## - As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session.
## - Windows 10 users: loo may be very slow if 'mc.cores' is set in your .Rprofile file (see https://github.com/stan-dev/loo/issues/94).
## This is bayesplot version 1.11.1
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
## * Does _not_ affect other ggplot2 plots
## * See ?bayesplot_theme_set for details on theme setting
##
## Attaching package: 'bayesplot'
## The following object is masked from 'package:brms':
##
## rhat
##
## rstan version 2.32.6 (Stan version 2.32.2)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
## change `threads_per_chain` option:
## rstan_options(threads_per_chain = 1)
## Do not specify '-march=native' in 'LOCAL_CPPFLAGS' or a Makevars file
## This is rstantools version 2.4.0
##
## Attaching package: 'coda'
## The following object is masked from 'package:rstan':
##
## traceplot
## Installing package into 'U:/R/4.4.2'
## (as 'lib' is unspecified)
## package 'Hmisc' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\local_hfry\Temp\3\RtmpIHqoR0\downloaded_packages
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
## This is posterior version 1.6.0
##
## Attaching package: 'posterior'
## The following objects are masked from 'package:rstan':
##
## ess_bulk, ess_tail
## The following object is masked from 'package:bayesplot':
##
## rhat
## The following objects are masked from 'package:stats':
##
## mad, sd, var
## The following objects are masked from 'package:base':
##
## %in%, match
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:Hmisc':
##
## src, summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
These are the most typical libraries you’ll need for Bayesian analyses.
library(brms)
library(loo)
library(bayesplot)
library(readxl)
library(ggplot2)
Making sure they are categorical variables.
complete_cancerlogreg <- read_xlsx("\\\\sscwin\\dfsroot\\users\\hfry\\Desktop\\Cancer Work Characteristics.xlsx")
str(complete_cancerlogreg)
## tibble [28 x 20] (S3: tbl_df/tbl/data.frame)
## $ ID : num [1:28] 1 2 3 4 5 6 7 8 9 10 ...
## $ Employment_Status_Num: num [1:28] 0 0 0 1 1 0 1 1 0 0 ...
## $ Job_Sat_Num : num [1:28] 1 0 1 0 0 0 0 0 1 0 ...
## $ Disclosure_All_Num : num [1:28] 1 1 1 0 0 0 0 0 0 1 ...
## $ Acc_Req_Rec : num [1:28] 1 1 1 1 1 2 1 1 1 1 ...
## $ Acc_Rec_Num : num [1:28] 1 1 0 0 0 0 0 0 0 0 ...
## $ Acc_Ask_Num : num [1:28] 1 1 0 0 0 1 0 0 0 0 ...
## $ Job_Perf_Num : num [1:28] 0 1 1 0 0 0 0 0 1 0 ...
## $ Job_Fit_Num : num [1:28] 0 1 1 0 0 0 0 0 1 0 ...
## $ Job_Fit_Num_2 : num [1:28] 0 1 1 0 1 1 0 0 1 1 ...
## $ Job_Sat_Num_2 : num [1:28] 1 1 1 0 1 0 1 1 1 1 ...
## $ Job_Perf_Num_2 : num [1:28] 1 1 1 0 1 1 0 0 1 1 ...
## $ Cancer_Type_Num : num [1:28] 3 5 5 1 1 5 3 5 3 4 ...
## $ Cancer_Stage_Num : num [1:28] 2 2 1 1 1 1 2 2 2 2 ...
## $ Age_Diag_Num : num [1:28] 8 27 32 21 18 20 26 36 59 78 ...
## $ Gender_Num : num [1:28] 1 1 1 1 1 1 1 2 2 1 ...
## $ Age : num [1:28] 43 33 33 28 21 26 35 39 61 79 ...
## $ Marital_Status_Num : num [1:28] 2 1 1 1 3 3 1 1 2 1 ...
## $ Income_Num : num [1:28] 1 3 5 3 6 1 3 3 1 2 ...
## $ Race_Num : num [1:28] 3 3 2 1 2 4 1 1 4 1 ...
# Convert all variables except Age and Age_Diag_Num to factors
complete_cancerlogreg <- complete_cancerlogreg %>%
mutate(across(-c(Age, Age_Diag_Num), as.factor))
# Check the structure to confirm the changes
str(complete_cancerlogreg)
## tibble [28 x 20] (S3: tbl_df/tbl/data.frame)
## $ ID : Factor w/ 28 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Employment_Status_Num: Factor w/ 2 levels "0","1": 1 1 1 2 2 1 2 2 1 1 ...
## $ Job_Sat_Num : Factor w/ 2 levels "0","1": 2 1 2 1 1 1 1 1 2 1 ...
## $ Disclosure_All_Num : Factor w/ 2 levels "0","1": 2 2 2 1 1 1 1 1 1 2 ...
## $ Acc_Req_Rec : Factor w/ 2 levels "1","2": 1 1 1 1 1 2 1 1 1 1 ...
## $ Acc_Rec_Num : Factor w/ 2 levels "0","1": 2 2 1 1 1 1 1 1 1 1 ...
## $ Acc_Ask_Num : Factor w/ 2 levels "0","1": 2 2 1 1 1 2 1 1 1 1 ...
## $ Job_Perf_Num : Factor w/ 2 levels "0","1": 1 2 2 1 1 1 1 1 2 1 ...
## $ Job_Fit_Num : Factor w/ 2 levels "0","1": 1 2 2 1 1 1 1 1 2 1 ...
## $ Job_Fit_Num_2 : Factor w/ 2 levels "0","1": 1 2 2 1 2 2 1 1 2 2 ...
## $ Job_Sat_Num_2 : Factor w/ 2 levels "0","1": 2 2 2 1 2 1 2 2 2 2 ...
## $ Job_Perf_Num_2 : Factor w/ 2 levels "0","1": 2 2 2 1 2 2 1 1 2 2 ...
## $ Cancer_Type_Num : Factor w/ 5 levels "1","2","3","4",..: 3 5 5 1 1 5 3 5 3 4 ...
## $ Cancer_Stage_Num : Factor w/ 2 levels "1","2": 2 2 1 1 1 1 2 2 2 2 ...
## $ Age_Diag_Num : num [1:28] 8 27 32 21 18 20 26 36 59 78 ...
## $ Gender_Num : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 2 2 1 ...
## $ Age : num [1:28] 43 33 33 28 21 26 35 39 61 79 ...
## $ Marital_Status_Num : Factor w/ 3 levels "1","2","3": 2 1 1 1 3 3 1 1 2 1 ...
## $ Income_Num : Factor w/ 6 levels "1","2","3","4",..: 1 3 5 3 6 1 3 3 1 2 ...
## $ Race_Num : Factor w/ 5 levels "1","2","3","4",..: 3 3 2 1 2 4 1 1 4 1 ...
# Set the reference category for all factors to 1
complete_cancerlogreg <- complete_cancerlogreg %>%
mutate(across(where(is.factor), ~ relevel(., ref = "1")))
# Check the structure to confirm the changes
str(complete_cancerlogreg)
## tibble [28 x 20] (S3: tbl_df/tbl/data.frame)
## $ ID : Factor w/ 28 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Employment_Status_Num: Factor w/ 2 levels "1","0": 2 2 2 1 1 2 1 1 2 2 ...
## $ Job_Sat_Num : Factor w/ 2 levels "1","0": 1 2 1 2 2 2 2 2 1 2 ...
## $ Disclosure_All_Num : Factor w/ 2 levels "1","0": 1 1 1 2 2 2 2 2 2 1 ...
## $ Acc_Req_Rec : Factor w/ 2 levels "1","2": 1 1 1 1 1 2 1 1 1 1 ...
## $ Acc_Rec_Num : Factor w/ 2 levels "1","0": 1 1 2 2 2 2 2 2 2 2 ...
## $ Acc_Ask_Num : Factor w/ 2 levels "1","0": 1 1 2 2 2 1 2 2 2 2 ...
## $ Job_Perf_Num : Factor w/ 2 levels "1","0": 2 1 1 2 2 2 2 2 1 2 ...
## $ Job_Fit_Num : Factor w/ 2 levels "1","0": 2 1 1 2 2 2 2 2 1 2 ...
## $ Job_Fit_Num_2 : Factor w/ 2 levels "1","0": 2 1 1 2 1 1 2 2 1 1 ...
## $ Job_Sat_Num_2 : Factor w/ 2 levels "1","0": 1 1 1 2 1 2 1 1 1 1 ...
## $ Job_Perf_Num_2 : Factor w/ 2 levels "1","0": 1 1 1 2 1 1 2 2 1 1 ...
## $ Cancer_Type_Num : Factor w/ 5 levels "1","2","3","4",..: 3 5 5 1 1 5 3 5 3 4 ...
## $ Cancer_Stage_Num : Factor w/ 2 levels "1","2": 2 2 1 1 1 1 2 2 2 2 ...
## $ Age_Diag_Num : num [1:28] 8 27 32 21 18 20 26 36 59 78 ...
## $ Gender_Num : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 2 2 1 ...
## $ Age : num [1:28] 43 33 33 28 21 26 35 39 61 79 ...
## $ Marital_Status_Num : Factor w/ 3 levels "1","2","3": 2 1 1 1 3 3 1 1 2 1 ...
## $ Income_Num : Factor w/ 6 levels "1","2","3","4",..: 1 3 5 3 6 1 3 3 1 2 ...
## $ Race_Num : Factor w/ 5 levels "1","2","3","4",..: 3 3 2 1 2 4 1 1 4 1 ...
Now that the data is ready, let’s see how different priors impact the logistic regression model.
The Student-t prior distribution has heavier tails compared to the normal prior and can be more appealing when weakly informative priors are needed, but still provides some regularization. ### Specify the priors
informative_priors_student_t <- c(
prior(student_t(3, 0, 1), class = "Intercept"),
prior(student_t(3, 1, 2), class = "b", coef = "Job_Sat_Num0"), # Assuming positive effect
prior(student_t(3, -1, 2), class = "b", coef = "Disclosure_All_Num0"), # Assuming negative effect
prior(student_t(3, 0.5, 2), class = "b", coef = "Acc_Req_Rec2"), # Assuming positive effect
prior(student_t(3, 1.5, 2), class = "b", coef = "Job_Perf_Num0"), # Assuming strong positive effect
prior(student_t(3, 1, 2), class = "b", coef = "Job_Fit_Num0") # Assuming positive effect
)
baylogreginfstu <- brm(
formula = Employment_Status_Num ~ Job_Sat_Num + Disclosure_All_Num +
Acc_Req_Rec + Job_Perf_Num + Job_Fit_Num,
data = complete_cancerlogreg,
family = bernoulli(link = "logit"),
prior = informative_priors_student_t,
chains = 4,
iter = 20000,
warmup = 10000,
thin = 10,
seed = 1234
)
## Compiling Stan program...
## Start sampling
summary(baylogreginfstu)
## Family: bernoulli
## Links: mu = logit
## Formula: Employment_Status_Num ~ Job_Sat_Num + Disclosure_All_Num + Acc_Req_Rec + Job_Perf_Num + Job_Fit_Num
## Data: complete_cancerlogreg (Number of observations: 28)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 10;
## total post-warmup draws = 4000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 2.70 1.27 0.57 5.58 1.00 4387 3930
## Job_Sat_Num0 -1.77 1.35 -4.64 0.65 1.00 4138 3851
## Disclosure_All_Num0 -0.23 0.95 -2.13 1.66 1.00 3957 3752
## Acc_Req_Rec2 1.06 1.35 -1.52 3.78 1.00 3840 4124
## Job_Perf_Num0 -2.64 1.44 -5.77 -0.07 1.00 3814 3857
## Job_Fit_Num0 0.56 1.16 -1.58 2.93 1.00 3951 4058
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Quick note: these coefficients are in the logit metric. You can convert to odds ratio easily by doing 1 - exp(coefficient). For example, for job performance, the odds ratio would be 1 - exp(-2.01) = 0.866. In this context, this indicates that the odds of being employed are about 86.6% lower for those with excellent job performance compared to those without. (We’ll come back to this.)
But first, let’s look at the ones from the model summary.
One of the most important diagnostics is the potential scale reduction factor. If the ratio of these two sources of variance are equal to one, then this is evidence that the chains have converged. If the \(\hat{R}\)> 1.01 you should investigate.
These make up the measure of the effective sample size or n_eff. Higher values indicate better mixing and more reliable estimates. (How can you fix this if the estimates are too low? Try increasing more burn-in and post burn-in iterations. And take advantage of thinning.)
You might get a warning message in R saying “There were 22 divergent transitions after warmup.” The number isn’t important; the fact that you got the message is cause for concern. The divergent transitions are areas of the posterior distribution that were not explored at all by the algorithm, thus biasing your results. You will need to go back and check the data and your priors.
The trace plot should look like a “random scatter” around a horizontal line, indicating that the chain has reached a stationary distribution. There should be no apparent trends or drifts over time, it should be tight. The colors should be difficult to distinguish.
mcmc_trace(baylogreginfstu, pars = c("b_Job_Sat_Num0",
"b_Disclosure_All_Num0",
"b_Acc_Req_Rec2",
"b_Job_Perf_Num0",
"b_Job_Fit_Num0"))
The posterior density plot the plot of the draws for each parameter in the model and for each chain. Strong deviation from normality (especially serious bi-modality) suggests issues with convergence of the parameter. (How can you fix this? more iterations, better priors, or both!)
mcmc_dens(baylogreginfstu, pars = c("b_Job_Sat_Num0",
"b_Disclosure_All_Num0",
"b_Acc_Req_Rec2",
"b_Job_Perf_Num0",
"b_Job_Fit_Num0"))
These plots show us how quickly the draws from the posterior distribution became independent.Should be a very quick, steep drop-off, then remain steady.
mcmc_acf(baylogreginfstu, pars = c("b_Job_Sat_Num0",
"b_Disclosure_All_Num0",
"b_Acc_Req_Rec2",
"b_Job_Perf_Num0",
"b_Job_Fit_Num0"))
These plots show us how close our observed data is to replicated data that is simulated from the posterior predictive distribution. The y and y_rep should closely match each other; if they do, that means our model is a good fit for the data
pp_check(baylogreginfstu, type = "bars", ndraws = 100)
We’ll look at this later when we compare all of the models side-by-side.
loo_baylogreginfstu <- loo(baylogreginfstu, moment_match = TRUE, reloo = TRUE)
## No problematic observations found. Returning the original 'loo' object.
print(loo_baylogreginfstu)
##
## Computed from 4000 by 28 log-likelihood matrix.
##
## Estimate SE
## elpd_loo -18.6 4.2
## p_loo 5.8 1.8
## looic 37.3 8.5
## ------
## MCSE of elpd_loo is 0.1.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.9, 1.0]).
##
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.
weakly_informative_priors_student_t <- c(
prior(normal(0, 1), class = "Intercept"),
prior(student_t(3, 0, 2.5), class = "b", coef = "Acc_Req_Rec2"), # No assumptions about the effect
prior(student_t(3, 0, 2.5), class = "b", coef = "Disclosure_All_Num0"), # No assumptions about the effect
prior(student_t(3, 0, 2.5), class = "b", coef = "Job_Sat_Num0"), # No assumptions about the effect
prior(student_t(3, 0, 2.5), class = "b", coef = "Job_Perf_Num0"), # No assumptions about the effect
prior(student_t(3, 0, 2.5), class = "b", coef = "Job_Fit_Num0") # No assumptions about the effect
)
baylogregweakinfstu <- brm(
formula = Employment_Status_Num ~ Job_Sat_Num + Disclosure_All_Num +
Acc_Req_Rec + Job_Perf_Num + Job_Fit_Num,
data = complete_cancerlogreg,
family = bernoulli(link = "logit"),
prior = weakly_informative_priors_student_t,
chains = 4,
iter = 20000,
warmup = 10000,
thin = 10,
seed = 1234
)
## Compiling Stan program...
## Start sampling
summary(baylogregweakinfstu)
## Family: bernoulli
## Links: mu = logit
## Formula: Employment_Status_Num ~ Job_Sat_Num + Disclosure_All_Num + Acc_Req_Rec + Job_Perf_Num + Job_Fit_Num
## Data: complete_cancerlogreg (Number of observations: 28)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 10;
## total post-warmup draws = 4000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 3.00 1.31 0.69 5.78 1.00 4055 3954
## Job_Sat_Num0 -2.07 1.38 -5.08 0.44 1.00 4185 4059
## Disclosure_All_Num0 0.11 1.04 -1.92 2.21 1.00 3435 3763
## Acc_Req_Rec2 1.06 1.52 -1.89 4.12 1.00 3582 3740
## Job_Perf_Num0 -2.78 1.38 -5.81 -0.35 1.00 4041 4098
## Job_Fit_Num0 0.40 1.24 -1.93 2.98 1.00 4220 3959
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
mcmc_trace(baylogregweakinfstu, pars = c("b_Job_Sat_Num0",
"b_Disclosure_All_Num0",
"b_Acc_Req_Rec2",
"b_Job_Perf_Num0",
"b_Job_Fit_Num0"))
mcmc_dens(baylogregweakinfstu, pars = c("b_Job_Sat_Num0",
"b_Disclosure_All_Num0",
"b_Acc_Req_Rec2",
"b_Job_Perf_Num0",
"b_Job_Fit_Num0"))
mcmc_acf(baylogregweakinfstu, pars = c("b_Job_Sat_Num0",
"b_Disclosure_All_Num0",
"b_Acc_Req_Rec2",
"b_Job_Perf_Num0",
"b_Job_Fit_Num0"))
pp_check(baylogregweakinfstu, type = "bars", ndraws = 100)
loo_baylogregweakinfstu <- loo(baylogregweakinfstu, moment_match = TRUE, reloo = TRUE)
## No problematic observations found. Returning the original 'loo' object.
print(loo_baylogregweakinfstu)
##
## Computed from 4000 by 28 log-likelihood matrix.
##
## Estimate SE
## elpd_loo -19.1 4.7
## p_loo 6.4 2.0
## looic 38.2 9.3
## ------
## MCSE of elpd_loo is 0.1.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.9, 1.1]).
##
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.
non_informative_priors_student_t <- c(
prior(student_t(3, 0, 10), class = "Intercept"),
prior(student_t(3, 0, 10), class = "b", coef = "Job_Sat_Num0"),
prior(student_t(3, 0, 10), class = "b", coef = "Disclosure_All_Num0"),
prior(student_t(3, 0, 10), class = "b", coef = "Acc_Req_Rec2"),
prior(student_t(3, 0, 10), class = "b", coef = "Job_Perf_Num0"),
prior(student_t(3, 0, 10), class = "b", coef = "Job_Fit_Num0")
)
baylogregnoninfstu <- brm(
formula = Employment_Status_Num ~ Job_Sat_Num + Disclosure_All_Num +
Acc_Req_Rec + Job_Perf_Num + Job_Fit_Num,
data = complete_cancerlogreg,
family = bernoulli(link = "logit"),
prior = non_informative_priors_student_t,
chains = 4,
iter = 20000,
warmup = 10000,
thin = 10,
seed = 1234
)
## Compiling Stan program...
## Start sampling
summary(baylogregnoninfstu)
## Family: bernoulli
## Links: mu = logit
## Formula: Employment_Status_Num ~ Job_Sat_Num + Disclosure_All_Num + Acc_Req_Rec + Job_Perf_Num + Job_Fit_Num
## Data: complete_cancerlogreg (Number of observations: 28)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 10;
## total post-warmup draws = 4000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 4.20 1.94 1.08 8.45 1.00 3957 3779
## Job_Sat_Num0 -3.09 1.83 -6.94 0.18 1.00 4084 3961
## Disclosure_All_Num0 0.30 1.27 -2.12 2.94 1.00 3607 3692
## Acc_Req_Rec2 1.59 1.87 -2.10 5.29 1.00 3802 3852
## Job_Perf_Num0 -4.04 1.89 -8.19 -0.86 1.00 3839 4054
## Job_Fit_Num0 1.15 1.74 -1.91 5.01 1.00 3911 3585
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
mcmc_trace(baylogregnoninfstu, pars = c("b_Job_Sat_Num0",
"b_Disclosure_All_Num0",
"b_Acc_Req_Rec2",
"b_Job_Perf_Num0",
"b_Job_Fit_Num0"))
mcmc_dens(baylogregnoninfstu, pars = c("b_Job_Sat_Num0",
"b_Disclosure_All_Num0",
"b_Acc_Req_Rec2",
"b_Job_Perf_Num0",
"b_Job_Fit_Num0"))
mcmc_acf(baylogregnoninfstu, pars = c("b_Job_Sat_Num0",
"b_Disclosure_All_Num0",
"b_Acc_Req_Rec2",
"b_Job_Perf_Num0",
"b_Job_Fit_Num0"))
pp_check(baylogregnoninfstu, type = "bars", ndraws = 100)
loo_baylogregnoninfstu <- loo(baylogregnoninfstu, moment_match = TRUE, reloo = TRUE)
## Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
## 1 problematic observation(s) found.
## The model will be refit 1 times.
##
## Fitting model 1 out of 1 (leaving out observation 26)
## Start sampling
print(loo_baylogregnoninfstu)
##
## Computed from 4000 by 28 log-likelihood matrix.
##
## Estimate SE
## elpd_loo -25.4 8.8
## p_loo 12.9 5.6
## looic 50.8 17.6
## ------
## MCSE of elpd_loo is 0.2.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.9, 1.0]).
##
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.
There are a couple things to pay attention to:
elpd_loo (or expected lob predictive density): Higher elpd_loo values indicate better predictive accuracy.
p_loo (or effective number of parameters): Lower p_loo values indicate a simpler (more parsimonious) model with fewer effective parameters.
looic (or LOO information criterion): Lower looic values indicate better model fit.
MCSE (or Monte Carlo standard error): Low MCSE indicate high precision of the elpd_loo estimate. If it says NA that can indicate issues with the MCMC sampling.
Pareto k diagnostic values: These values assess the reliability of the loo estimates by identifying observations that may have a disproportionate influence on the loo estimates. Good Pareto k estimates are k < 0.7, bad estimates are 0.7 k < 1; and very bad estimates are k > 1.
loo_results <- loo(baylogregnoninfstu, baylogregweakinfstu, baylogreginfstu, moment_match = TRUE, reloo = TRUE)
## Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
## 1 problematic observation(s) found.
## The model will be refit 1 times.
##
## Fitting model 1 out of 1 (leaving out observation 26)
## Start sampling
## No problematic observations found. Returning the original 'loo' object.
## No problematic observations found. Returning the original 'loo' object.
print(loo_results)
## Output of model 'baylogregnoninfstu':
##
## Computed from 4000 by 28 log-likelihood matrix.
##
## Estimate SE
## elpd_loo -25.2 8.6
## p_loo 12.8 5.5
## looic 50.4 17.3
## ------
## MCSE of elpd_loo is 0.2.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.9, 1.0]).
##
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'baylogregweakinfstu':
##
## Computed from 4000 by 28 log-likelihood matrix.
##
## Estimate SE
## elpd_loo -19.1 4.7
## p_loo 6.4 2.0
## looic 38.2 9.3
## ------
## MCSE of elpd_loo is 0.1.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.9, 1.1]).
##
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'baylogreginfstu':
##
## Computed from 4000 by 28 log-likelihood matrix.
##
## Estimate SE
## elpd_loo -18.6 4.2
## p_loo 5.8 1.8
## looic 37.3 8.5
## ------
## MCSE of elpd_loo is 0.1.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.9, 1.0]).
##
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## Model comparisons:
## elpd_diff se_diff
## baylogreginfstu 0.0 0.0
## baylogregweakinfstu -0.5 0.6
## baylogregnoninfstu -6.6 4.6
This is a measure of how well the model explains the variability in the observed data, sound familiar? The higher the value, the larger the variance in the dependent variable is explained by the model, suggesting a good fit. These come with the point estimate and the 95% credible interval. The smaller the credible interval, the more precise (certain) the estimate; the larger the interval, the less precise (more uncertain) the estimate.
r2_noninfstu <- bayes_R2(baylogregnoninfstu)
r2_weakinfstu <- bayes_R2(baylogregweakinfstu)
r2_infstu <- bayes_R2(baylogreginfstu)
Print bayesian r-squared values
print(r2_noninfstu)
## Estimate Est.Error Q2.5 Q97.5
## R2 0.4490164 0.07914935 0.2602134 0.5788573
print(r2_weakinfstu)
## Estimate Est.Error Q2.5 Q97.5
## R2 0.3929171 0.08745205 0.1957688 0.5309543
print(r2_infstu)
## Estimate Est.Error Q2.5 Q97.5
## R2 0.3706217 0.09261548 0.1638938 0.5205366