library(tidyverse)
## devtools::install_github('jburos/biostan', build_vignettes = TRUE, dependencies = TRUE)
library(rstan)
## To avoid recompilation of unchanged Stan programs, we recommend calling
rstan_options(auto_write = TRUE)
##
library(bayesplot)
##
library(loo)
## Seed from random.org
set.seed(673788956)
This dataset contains information on the number of days absent from school for each student along with the student’s math test score and the type of program that the student belonged to.
data1 <- haven::read_dta("https://stats.idre.ucla.edu/stat/stata/dae/nb_data.dta")
data1 <- data1 %>%
mutate(prog = factor(prog, levels = 1:3, labels = c("General", "Academic", "Vocational")),
id = factor(id))
data1
## # A tibble: 314 x 5
## id gender math daysabs prog
## <fct> <dbl+lbl> <dbl> <dbl> <fct>
## 1 1001 2 63 4 Academic
## 2 1002 2 27 4 Academic
## 3 1003 1 20 2 Academic
## 4 1004 1 16 3 Academic
## 5 1005 1 2 3 Academic
## 6 1006 1 71 13 Academic
## 7 1007 1 63 11 Academic
## 8 1008 2 3 7 Academic
## 9 1009 2 51 10 Academic
## 10 1010 2 49 9 Vocational
## # ... with 304 more rows
## For counting
data1_count <- data1 %>%
rename(y = daysabs) %>%
count(y)
data1_count
## # A tibble: 30 x 2
## y n
## <dbl> <int>
## 1 0 57
## 2 1 41
## 3 2 28
## 4 3 27
## 5 4 25
## 6 5 20
## 7 6 16
## 8 7 14
## 9 8 10
## 10 9 13
## # ... with 20 more rows
The distribution is quite wide spread.
data1 %>%
ggplot(mapping = aes(x = daysabs, fill = prog)) +
geom_bar(stat = "count") +
facet_grid(prog ~ ., margin = TRUE, scales = "free_y") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
A rough check of mean-variance comparison indicates presence of overdispersion although this does not account for the continuous math test score variable.
data1 %>%
group_by(prog) %>%
summarize(mean = mean(daysabs),
var = var(daysabs))
## # A tibble: 3 x 3
## prog mean var
## <fct> <dbl> <dbl>
## 1 General 10.6 67.3
## 2 Academic 6.93 55.4
## 3 Vocational 2.67 13.9
We will assume the DAE design matrix is sufficient for modeling covariates. A numerical matrix is required for rstan.
X <- model.matrix(object = daysabs ~ math + prog, data = data1)
head(X, n = 10)
## (Intercept) math progAcademic progVocational
## 1 1 63 1 0
## 2 1 27 1 0
## 3 1 20 1 0
## 4 1 16 1 0
## 5 1 2 1 0
## 6 1 71 1 0
## 7 1 63 1 0
## 8 1 3 1 0
## 9 1 51 1 0
## 10 1 49 0 1
y <- data1$daysabs
We will use these function to extract and visualize posterior predictive distributions.
##
extract_post_pred <- function(stan_fit) {
tidybayes::tidy_draws(stan_fit) %>%
select(.chain, .iteration, .draw, starts_with("y_new")) %>%
gather(key = key, value = value, starts_with("y_new")) %>%
mutate(key = gsub("y_new|\\[|\\]", "", key) %>% as.integer())
}
##
plot_draws <- function(stan_fit, n_sample, data_count = data1_count) {
draw_data <- extract_post_pred(stan_fit)
sample_indices <- sample(seq_len(max(draw_data$.iteration)), size = n_sample)
draw_data %>%
group_by(.chain, .iteration, .draw) %>%
count(value) %>%
filter(.iteration %in% sample_indices) %>%
ggplot(mapping = aes(x = value, y = n, group = interaction(.chain, .iteration, .draw))) +
## Plot random draws from posterior
geom_line(alpha = 0.5, size = 0.1) +
## Include actual data distribution
geom_line(data = data_count, color = "gray", alpha = 0.7, size = 2,
mapping = aes(x = y, group = NULL)) +
facet_wrap( ~ .chain) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
}
This is a Poisson model without any covariates. The MCMC diagnostics are good. The model is a poor fit as seen in the discrepancy between posterior predictive distributions and the data distribution. \[\begin{align*} &\text{Prior}\\ \lambda | \alpha, \beta &\sim Gamma(\alpha, \beta)\\ \\ &\text{Likelihood}\\ y_{i} | \lambda &\overset{\text{iid}}{\sim} Poisson(\lambda)\\ \end{align*} \]
stan_code_poisson <- biostan::read_stan_file("./bmspe_poisson.stan")
## Warning: replacing previous import 'rstan::loo' by 'rstanarm::loo' when loading 'biostan'
biostan::print_stan_code(stan_code_poisson, section = NULL)
## data {
## /* Hyperparameters*/
## real<lower=0> a;
## real<lower=0> b;
##
## /* Dimensions */
## int<lower=0> N;
## int<lower=0> M;
## /* Design Matrix */
## matrix[N,M] X;
## /* Outcome (a real vector of length n) */
## int<lower=0> y[N];
## }
##
## parameters {
## real<lower=0> lambda;
## }
##
## model {
## /* Prior */
## /* lambda ~ gamma(a, b); */
## /* Explicit contribution to target */
## target += gamma_lpdf(lambda | a, b);
##
## /* Likelihood */
## /* y ~ poisson(lambda); */
## /* Explicit contribution to target */
## for (i in 1:N) {
## target += poisson_lpmf(y[i] | lambda);
## }
## }
##
## generated quantities {
## int<lower=0> y_new[N];
## vector[N] log_lik;
##
## for (i in 1:N) {
## y_new[i] = poisson_rng(lambda);
## log_lik[i] = poisson_lpmf(y[i] | lambda);
## }
## }
##
stan_model_poisson <- stan(model_code = stan_code_poisson,
data = list(a = 10^(-3), b = 10^(-3),
N = nrow(X), M = ncol(X),
X = X,
y = y),
chains = n_cores)
check_hmc_diagnostics(stan_model_poisson)
##
## Divergences:
##
## Tree depth:
##
## Energy:
pars <- c("lambda","lp__")
print(stan_model_poisson, pars = pars)
## Inference for Stan model: 843946174fd41dafb52150aa37e93e57.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## lambda 5.95 0.00 0.14 5.69 5.86 5.95 6.04 6.23 4430 1
## lp__ -1557.93 0.01 0.70 -1559.96 -1558.09 -1557.65 -1557.48 -1557.43 5340 1
##
## Samples were drawn using NUTS(diag_e) at Tue Jan 1 07:20:12 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
pairs(stan_model_poisson, pars = pars)
traceplot(stan_model_poisson, inc_warmup = TRUE, pars = pars)
plot_draws(stan_model_poisson, n_sample = 20)
loo(stan_model_poisson)
##
## Computed from 12000 by 314 log-likelihood matrix
##
## Estimate SE
## elpd_loo -1555.2 92.9
## p_loo 8.3 1.1
## looic 3110.3 185.8
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
Including covariates slightly improved the fit and the posterior predictive distributions are closer to the data distribution. However, the zero part is not accounted for well. \[\begin{align*} &\text{Prior}\\ \boldsymbol{\beta} &\sim MVN(\boldsymbol{0}, s\mathbf{I})\\ \\ &\text{Likelihood}\\ \eta_{i} &= \mathbf{X}_{i}^{T}\boldsymbol{\beta}\\ \mu_{i} &= e^{\eta_{i}}\\ y_{i} | \mu_{i} &\overset{\text{ind}}{\sim} Poisson(\mu_{i})\\ \end{align*} \]
stan_code_poisson_covs <- biostan::read_stan_file("./bmspe_poisson_covs.stan")
biostan::print_stan_code(stan_code_poisson_covs, section = NULL)
## data {
## /* Hyperparameters*/
## real<lower=0> s;
##
## /* Dimensions */
## int<lower=0> N;
## int<lower=0> M;
## /* Design Matrix */
## matrix[N,M] X;
## /* Outcome */
## int<lower=0> y[N];
## }
##
## parameters {
## vector[M] beta;
## }
##
## transformed parameters {
## vector[N] eta;
##
## eta = X * beta;
## }
##
## model {
## /* Prior */
## for (j in 1:M) {
## target += normal_lpdf(beta[j] | 0, s);
## }
##
## /* Likelihood */
## /* y_i ~ poisson(exp(eta_i)); */
## for (i in 1:N) {
## target += poisson_log_lpmf(y[i] | eta[i]);
## }
## }
##
## generated quantities {
## int<lower=0> y_new[N];
## vector[N] log_lik;
##
## for (i in 1:N) {
## if (eta[i] > 20) {
## /* To avoid erros like the below during the warmup. */
## /* [2] " Exception: poisson_log_rng: Log rate parameter is 69.8999, but must be less than 20.7944 (in 'modelb25632c451a8_0ba3e86968d73bc8913ee68b8ce5542b' at line 40)" */
## /* Check posterior predictive. */
## y_new[i] = poisson_log_rng(20);
## } else {
## y_new[i] = poisson_log_rng(eta[i]);
## }
##
## log_lik[i] = poisson_log_lpmf(y[i] | eta[i]);
## }
## }
##
stan_model_poisson_covs <- stan(model_code = stan_code_poisson_covs,
data = list(s = 10,
N = nrow(X), M = ncol(X),
X = X,
y = y),
chains = n_cores)
check_hmc_diagnostics(stan_model_poisson_covs)
##
## Divergences:
##
## Tree depth:
##
## Energy:
pars <- c("beta","lp__")
print(stan_model_poisson_covs, pars = pars)
## Inference for Stan model: 1bf021e5d6a2cfcbeea4a9c4dd4e7110.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## beta[1] 2.65 0.00 0.06 2.53 2.61 2.65 2.69 2.77 3957 1
## beta[2] -0.01 0.00 0.00 -0.01 -0.01 -0.01 -0.01 -0.01 8260 1
## beta[3] -0.44 0.00 0.06 -0.55 -0.48 -0.44 -0.40 -0.33 3929 1
## beta[4] -1.28 0.00 0.08 -1.44 -1.34 -1.28 -1.23 -1.13 4375 1
## lp__ -1343.58 0.02 1.41 -1347.15 -1344.27 -1343.26 -1342.54 -1341.81 4648 1
##
## Samples were drawn using NUTS(diag_e) at Tue Jan 1 07:21:15 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
pairs(stan_model_poisson_covs, pars = pars)
traceplot(stan_model_poisson_covs, inc_warmup = TRUE, pars = pars)
plot_draws(stan_model_poisson_covs, n_sample = 20)
loo(stan_model_poisson_covs)
##
## Computed from 12000 by 314 log-likelihood matrix
##
## Estimate SE
## elpd_loo -1343.5 74.5
## p_loo 25.1 3.4
## looic 2686.9 149.1
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
We are using vague priors on the regression coefficients, so the results are similar to the corresponding Frequentist fit.
## Frequentist Poisson fit for comparison
summary(glm(formula = daysabs ~ math + prog, data = data1, family = poisson(link = "log")))
##
## Call:
## glm(formula = daysabs ~ math + prog, family = poisson(link = "log"),
## data = data1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.2597 -2.2038 -0.9193 0.6511 7.4233
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.651974 0.060736 43.664 < 2e-16 ***
## math -0.006808 0.000931 -7.313 2.62e-13 ***
## progAcademic -0.439897 0.056672 -7.762 8.35e-15 ***
## progVocational -1.281364 0.077886 -16.452 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 2217.7 on 313 degrees of freedom
## Residual deviance: 1774.0 on 310 degrees of freedom
## AIC: 2665.3
##
## Number of Fisher Scoring iterations: 5
The zero-inflated Poisson model a two-part model that models two different types of zeros. Structural zeros coming from a subpopulation who can only have zeros and Poisson zeros coming from subpopulation who can have non-zero values but by chance had zeros. In this example of days absent from school, this may not be a feasible model. \[\begin{align*} &\text{Prior}\\ \boldsymbol{\beta} &\sim MVN(\boldsymbol{0}, \text{diag}(s))\\ \boldsymbol{\beta}_{\theta} &\sim MVN(\boldsymbol{0}, \text{diag}(s_{\theta}))\\ \\ &\text{Likelihood}\\ \eta_{\theta,i} &= \mathbf{X}_{i}^{T}\boldsymbol{\beta}_{\theta}\\ \mu_{\theta,i} &= \text{expit}(\eta_{i})\\ z_{i} | \mu_{\theta,i} &\overset{\text{ind}}{\sim} Bernoulli(\mu_{\theta,i})\\ \\ \eta_{i} &= \mathbf{X}_{i}^{T}\boldsymbol{\beta}\\ \mu_{i} &= e^{\eta_{i}}\\ y_{i} | \mu_{i},z_{i} &\overset{\text{ind}}{\sim} (1 - z_{i}) Poisson(\mu_{i})\\ \end{align*} \]
stan_code_poisson_covs_zip <- biostan::read_stan_file("./bmspe_poisson_covs_zip.stan")
biostan::print_stan_code(stan_code_poisson_covs_zip, section = NULL)
## /* https://discourse.mc-stan.org/t/learning-how-to-write-a-zero-inflated-poisson-model-with-stan/6726 */
## data {
## /* Dimensions */
## int<lower=0> N;
## int<lower=0> M;
## /* Design Matrix */
## matrix[N,M] X;
## /* Outcome */
## int<lower=0> y[N];
## /* Hyperparameters*/
## real<lower=0> s[M];
## real<lower=0> s_theta[M];
## }
##
## parameters {
## vector[M] beta;
## vector[M] beta_theta;
## }
##
## transformed parameters {
## vector[N] eta;
## vector[N] eta_theta;
##
## eta = X * beta;
## eta_theta = X * beta_theta;
## }
##
## model {
## /* Prior */
## for (j in 1:M) {
## target += normal_lpdf(beta[j] | 0, s[j]);
## target += normal_lpdf(beta_theta[j] | 0, s_theta[j]);
## }
##
## /* Likelihood */
## /* https://github.com/paul-buerkner/brms/blob/master/inst/chunks/fun_zero_inflated_poisson.stan */
## for (i in 1:N) {
## if (y[i] == 0) {
## /* Zero case */
## target += log_sum_exp(/* Structural zero */
## bernoulli_logit_lpmf(1 | eta_theta[i]),
## /* Poisson zero */
## bernoulli_logit_lpmf(0 | eta_theta[i]) +
## poisson_log_lpmf(0 | eta[i]));
## } else {
## /* Non-zero case */
## /* First term means not structural zero. */
## target += bernoulli_logit_lpmf(0 | eta_theta[i]) +
## /* y[i] is relevant only here. */
## poisson_log_lpmf(y[i] | eta[i]);
## }
## }
## }
##
## generated quantities {
## int y_new[N];
## vector[N] log_lik;
##
## for (i in 1:N) {
##
## if (bernoulli_logit_rng(eta_theta[i]) == 1) {
## /* Structural zero */
## y_new[i] = 0;
## } else {
## /* Not structural zero */
## if (eta[i] > 20) {
## /* To avoid erros like the below during the warmup. */
## /* Check posterior predictive. */
## y_new[i] = poisson_log_rng(20);
## } else {
## y_new[i] = poisson_log_rng(eta[i]);
## }
## }
##
##
## if (y[i] == 0) {
## /* Zero case */
## log_lik[i] = log_sum_exp(/* Structural zero */
## bernoulli_logit_lpmf(1 | eta_theta[i]),
## /* Poisson zero */
## bernoulli_logit_lpmf(0 | eta_theta[i]) +
## poisson_log_lpmf(0 | eta[i]));
## } else {
## /* Non-zero case */
## /* First term means not structural zero. */
## log_lik[i] = bernoulli_logit_lpmf(0 | eta_theta[i]) +
## /* y[i] is relevant only here. */
## poisson_log_lpmf(y[i] | eta[i]);
## }
##
## }
## }
##
stan_model_poisson_covs_zip <- stan(model_code = stan_code_poisson_covs_zip,
## Regularize binomial part to avoid divergent iterations.
data = list(s = rep(10,ncol(X)), s_theta = rep(10,ncol(X)),
N = nrow(X), M = ncol(X),
X = X,
y = y),
chains = n_cores)
## Warning: There were 601 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: There were 1747 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
check_hmc_diagnostics(stan_model_poisson_covs_zip)
##
## Divergences:
##
## Tree depth:
##
## Energy:
pars <- c("beta","beta_theta","lp__")
print(stan_model_poisson_covs_zip, pars = pars)
## Inference for Stan model: d0f831067dd936a77b47661b4f6b0c98.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## beta[1] 2.61 0.01 0.07 2.48 2.56 2.61 2.65 2.75 30 1.18
## beta[2] -0.01 0.00 0.00 -0.01 -0.01 -0.01 0.00 0.00 13 1.37
## beta[3] -0.34 0.02 0.08 -0.50 -0.39 -0.33 -0.28 -0.20 13 1.45
## beta[4] -1.03 0.06 0.16 -1.38 -1.14 -0.98 -0.91 -0.80 8 2.13
## beta_theta[1] -6.16 1.34 5.39 -14.95 -9.28 -6.81 -4.62 8.00 16 1.30
## beta_theta[2] -2.04 1.43 4.35 -14.99 -0.10 0.01 0.02 0.03 9 1.77
## beta_theta[3] 6.15 0.24 4.00 0.83 3.41 5.38 8.14 15.77 275 1.04
## beta_theta[4] 4.81 1.17 6.32 -12.97 3.39 5.48 8.05 14.86 29 1.15
## lp__ -1254.89 22.69 55.63 -1354.10 -1262.35 -1223.35 -1221.72 -1220.01 6 27.49
##
## Samples were drawn using NUTS(diag_e) at Tue Jan 1 07:24:16 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
pairs(stan_model_poisson_covs_zip, pars = pars)
traceplot(stan_model_poisson_covs_zip, inc_warmup = TRUE, pars = pars)
plot_draws(stan_model_poisson_covs_zip, n_sample = 20)
loo(stan_model_poisson_covs_zip)
## Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
##
## Computed from 12000 by 314 log-likelihood matrix
##
## Estimate SE
## elpd_loo -1320.7 69.0
## p_loo 147.7 15.1
## looic 2641.4 138.0
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 312 99.4% 2
## (0.5, 0.7] (ok) 0 0.0% <NA>
## (0.7, 1] (bad) 2 0.6% 0
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
The model did not converge with vague priors on both sets of coefficients. The pairs plot shows straight line patterns among coefficients fro the Bernoulli model coefficients. As seen in the barplots of the data, the General program does not have any individuals with zero days absent from school. This may be causing a complete separation issue. Taking this as a data sparsity issue, we will examine a model that regularizes more heavily the non-intercept coefficients for the Bernoulli model.
stan_model_poisson_covs_zip2 <- stan(model_code = stan_code_poisson_covs_zip,
## Regularize binomial part to avoid divergent iterations.
data = list(s = rep(10,ncol(X)), s_theta = c(10,rep(2, ncol(X)-1)),
N = nrow(X), M = ncol(X),
X = X,
y = y),
chains = n_cores)
check_hmc_diagnostics(stan_model_poisson_covs_zip2)
##
## Divergences:
##
## Tree depth:
##
## Energy:
pars <- c("beta","beta_theta","lp__")
print(stan_model_poisson_covs_zip2, pars = pars)
## Inference for Stan model: d0f831067dd936a77b47661b4f6b0c98.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## beta[1] 2.59 0.00 0.06 2.47 2.55 2.59 2.63 2.70 6940 1
## beta[2] -0.01 0.00 0.00 -0.01 -0.01 -0.01 0.00 0.00 12619 1
## beta[3] -0.30 0.00 0.06 -0.42 -0.34 -0.30 -0.26 -0.19 6791 1
## beta[4] -0.94 0.00 0.08 -1.10 -0.99 -0.94 -0.89 -0.78 7814 1
## beta_theta[1] -4.40 0.01 0.91 -6.36 -4.98 -4.34 -3.75 -2.77 5036 1
## beta_theta[2] 0.01 0.00 0.01 0.00 0.01 0.01 0.02 0.03 10619 1
## beta_theta[3] 1.87 0.01 0.85 0.38 1.27 1.81 2.40 3.73 4965 1
## beta_theta[4] 2.79 0.01 0.85 1.33 2.19 2.74 3.32 4.66 4917 1
## lp__ -1219.84 0.03 2.01 -1224.74 -1220.93 -1219.51 -1218.37 -1216.97 4796 1
##
## Samples were drawn using NUTS(diag_e) at Tue Jan 1 07:26:15 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
pairs(stan_model_poisson_covs_zip2, pars = pars)
traceplot(stan_model_poisson_covs_zip2, inc_warmup = TRUE, pars = pars)
plot_draws(stan_model_poisson_covs_zip2, n_sample = 20)
loo(stan_model_poisson_covs_zip2)
##
## Computed from 12000 by 314 log-likelihood matrix
##
## Estimate SE
## elpd_loo -1210.2 64.2
## p_loo 25.4 3.2
## looic 2420.5 128.5
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
The regularized model converged. The narrow linear patterns among the Bernoulli model coefficients are probably due to strong correlation. Being either in an Academic program (coefficient 3) or Vocational program (coefficient 4) is a strong predictor of having zero absent days. So when one is strong, the other is also strong. When the coefficients for these are strong, the intercept (coefficient 1) has to be very small.
## Frequentist ZIP fit for comparison
## https://stats.idre.ucla.edu/r/dae/zip/
summary(pscl::zeroinfl(formula = daysabs ~ math + prog | math + prog, data = data1))
##
## Call:
## pscl::zeroinfl(formula = daysabs ~ math + prog | math + prog, data = data1)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -2.3410 -1.1702 -0.6026 0.5864 7.3907
##
## Count model coefficients (poisson with log link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.5879178 0.0614173 42.137 < 2e-16 ***
## math -0.0052060 0.0009291 -5.603 0.0000000210 ***
## progAcademic -0.3040946 0.0567438 -5.359 0.0000000836 ***
## progVocational -0.9397286 0.0794916 -11.822 < 2e-16 ***
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -19.203567 1705.841440 -0.011 0.991
## math 0.012639 0.006926 1.825 0.068 .
## progAcademic 16.737788 1705.841413 0.010 0.992
## progVocational 17.660593 1705.841412 0.010 0.992
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of iterations in BFGS optimization: 13
## Log-likelihood: -1192 on 8 Df
As expected, a non-regularized Frequentist model breaks down with very high standard errors for Bernoulli model coefficients.
\[\begin{align*} &\text{Prior}\\ \boldsymbol{\beta} &\sim MVN(\boldsymbol{0}, s\mathbf{I})\\ a_{\gamma} &\sim Gamma(a, b)\\ \\ &\text{Likelihood}\\ \eta_{i} &= \mathbf{X}_{i}^{T}\boldsymbol{\beta}\\ \mu_{i} &= e^{\eta_{i}}\\ \gamma_{i} | a_{\gamma} &\overset{\text{iid}}{\sim} Gamma(1/a_{\gamma},1/a_{\gamma}) ~~ E[\gamma_{i}] = 1, Var(\gamma_{i}) = a_{\gamma}\\ y_{i} | \gamma_{i},\mu_{i} &\overset{\text{ind}}{\sim} Poisson(\gamma_{i} \mu_{i})\\ \end{align*} \]
stan_code_poisson_covs_gamma <- biostan::read_stan_file("./bmspe_poisson_covs_gamma.stan")
biostan::print_stan_code(stan_code_poisson_covs_gamma, section = NULL)
## data {
## /* Hyperparameters*/
## real<lower=0> a;
## real<lower=0> b;
## real<lower=0> s;
##
## /* Dimensions */
## int<lower=0> N;
## int<lower=0> M;
## /* Design Matrix */
## matrix[N,M] X;
## /* Outcome */
## int<lower=0> y[N];
## }
##
## parameters {
## vector[M] beta;
## vector<lower=0>[N] gamma;
## real<lower=0> a_gamma;
## }
##
## transformed parameters {
## vector[N] eta;
## vector<lower=0>[N] mu;
##
## eta = X * beta;
## mu = exp(eta);
## }
##
## model {
## /* Prior */
## for (j in 1:M) {
## /* beta_j ~ N(0, s) */
## target += normal_lpdf(beta[j] | 0, s);
## }
## /* a_gamma ~ Gamma(a, b) */
## target += gamma_lpdf(a_gamma | a, b);
##
## /* Likelihood */
## for (i in 1:N) {
## /* gamma_i ~ Gamma(a_gamma, bb) */
## /* E[gamma_i] = 1 must be met for identifiability. */
## target += gamma_lpdf(gamma[i] | 1/a_gamma, 1/a_gamma);
## /* y_i ~ poisson(gamma_i * mu_i); */
## target += poisson_lpmf(y[i] | gamma[i] * mu[i]);
## }
## }
##
## generated quantities {
## int y_new[N];
## vector[N] log_lik;
##
## for (i in 1:N) {
## if (gamma[i] * mu[i] > 1e+09) {
## /* To avoid erros like the below during the warmup. */
## /* Exception: poisson_rng: Rate parameter is 1.92222e+40, but must be less than 1.07374e+09 */
## /* Check posterior predictive. */
## y_new[i] = -1;
## } else {
## y_new[i] = poisson_rng(gamma[i] * mu[i]);
## }
##
## log_lik[i] = gamma_lpdf(gamma[i] | 1/a_gamma, 1/a_gamma) + poisson_lpmf(y[i] | gamma[i] * mu[i]);
## }
## }
##
stan_model_poisson_covs_gamma <- stan(model_code = stan_code_poisson_covs_gamma,
data = list(a = 10^(-3), b = 10^(-3),
s = 10,
N = nrow(X), M = ncol(X),
X = X,
y = y),
chains = n_cores)
check_hmc_diagnostics(stan_model_poisson_covs_gamma)
##
## Divergences:
##
## Tree depth:
##
## Energy:
pars <- c("beta","a_gamma","lp__")
print(stan_model_poisson_covs_gamma, pars = pars)
## Inference for Stan model: 6c9eb83e4c4e607ea8864804bdd7b685.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## beta[1] 2.63 0.00 0.20 2.24 2.49 2.62 2.76 3.03 2494 1.01
## beta[2] -0.01 0.00 0.00 -0.01 -0.01 -0.01 0.00 0.00 3709 1.00
## beta[3] -0.45 0.00 0.19 -0.82 -0.57 -0.44 -0.32 -0.09 2387 1.00
## beta[4] -1.28 0.00 0.21 -1.70 -1.42 -1.28 -1.14 -0.88 2970 1.00
## a_gamma 0.99 0.00 0.10 0.80 0.92 0.99 1.06 1.21 5969 1.00
## lp__ -1118.41 0.27 15.21 -1149.22 -1128.47 -1118.26 -1108.09 -1089.71 3159 1.00
##
## Samples were drawn using NUTS(diag_e) at Tue Jan 1 07:28:26 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
pairs(stan_model_poisson_covs_gamma, pars = pars)
traceplot(stan_model_poisson_covs_gamma, inc_warmup = TRUE, pars = pars)
plot_draws(stan_model_poisson_covs_gamma, n_sample = 20)
loo(stan_model_poisson_covs_gamma)
## Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
## Warning in log(z): NaNs produced
## Warning in log(z): NaNs produced
## Warning in log(z): NaNs produced
## Warning in log(z): NaNs produced
## Warning in log(z): NaNs produced
##
## Computed from 12000 by 314 log-likelihood matrix
##
## Estimate SE
## elpd_loo -1224.5 25.1
## p_loo 370.3 6.0
## looic 2448.9 50.1
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 0 0.0% <NA>
## (0.5, 0.7] (ok) 4 1.3% 151
## (0.7, 1] (bad) 223 71.0% 13
## (1, Inf) (very bad) 87 27.7% 5
## See help('pareto-k-diagnostic') for details.
If we marginalize over the latent individual-specific random effect \(\gamma_{i}\), we obtain the negative binomial. In the following formulation, \(a_{\gamma}\) should have the same interpretation as in the previous model. \[\begin{align*} &\text{Prior}\\ \boldsymbol{\beta} &\sim MVN(\boldsymbol{0}, s\mathbf{I})\\ a_{\gamma} &\sim Gamma(a, b)\\ \phi &= \frac{1}{a_{\gamma}}\\ \\ &\text{Likelihood}\\ \eta_{i} &= \mathbf{X}_{i}^{T}\boldsymbol{\beta}\\ \mu_{i} &= e^{\eta_{i}}\\ y_{i} | \mu_{i}, \phi &\overset{\text{ind}}{\sim} NegBin(\mu_{i}, \phi)\\ \\ &\text{Stan definition}\\ \text{NegBinomial2}(y_{i} | \mu_{i}, \phi) &= \frac{\Gamma(y_{i} + \phi)}{y_{i}!\Gamma(\phi)} \left( \frac{\mu_{i}}{\mu_{i}+\phi} \right)^{y_{i}} \left( \frac{\phi}{\mu_{i}+\phi} \right)^{\phi}\\ \end{align*} \]
## http://rstudio-pubs-static.s3.amazonaws.com/34099_2e35c3966ef548c2918d5b6c2146bfd1.html
stan_code_negbin_covs <- biostan::read_stan_file("./bmspe_negbin_covs.stan")
biostan::print_stan_code(stan_code_negbin_covs, section = NULL)
## data {
## /* Hyperparameters*/
## real<lower=0> a;
## real<lower=0> b;
## real<lower=0> s;
##
## /* Dimensions */
## int<lower=0> N;
## int<lower=0> M;
## /* Design Matrix */
## matrix[N,M] X;
## /* Outcome */
## int<lower=0> y[N];
## }
##
## parameters {
## vector[M] beta;
## real<lower=0> a_gamma;
## }
##
## transformed parameters {
## vector[N] eta;
## real<lower=0> phi;
##
## eta = X * beta;
## phi = 1 / a_gamma;
## }
##
## model {
## /* Prior */
## for (j in 1:M) {
## target += normal_lpdf(beta[j] | 0, s);
## }
## target += gamma_lpdf(a_gamma | a, b);
##
## /* Likelihood */
## /* y_i ~ poisson(exp(eta_i)); */
## for (i in 1:N) {
## /* https://mc-stan.org/docs/2_18/functions-reference/nbalt.html */
## target += neg_binomial_2_lpmf(y[i] | exp(eta[i]), phi);
## }
## }
##
## generated quantities {
## int y_new[N];
## vector[N] log_lik;
##
## for (i in 1:N) {
## /* eta[i] is the log(mean). */
## if (eta[i] > 15) {
## /* To avoid erros like the below during the warmup. */
## /* neg_binomial_2_rng: Random number that came from gamma distribution is 3.02668e+39, but must be less than 1.07374e+09 */
## /* https://groups.google.com/forum/#!topic/stan-users/4g2hbwtRELQ */
## /* Check posterior predictive for anomaly. */
## y_new[i] = -1;
## } else {
## y_new[i] = neg_binomial_2_rng(exp(eta[i]), phi);
## }
##
## log_lik[i] = neg_binomial_2_lpmf(y[i] | exp(eta[i]), phi);
## }
## }
##
stan_model_negbin_covs <- stan(model_code = stan_code_negbin_covs,
data = list(a = 10^(-3), b = 10^(-3),
s = 10,
N = nrow(X), M = ncol(X),
X = X,
y = y),
chains = n_cores)
check_hmc_diagnostics(stan_model_negbin_covs)
##
## Divergences:
##
## Tree depth:
##
## Energy:
pars <- c("beta","a_gamma","phi","lp__")
print(stan_model_negbin_covs, pars = pars)
## Inference for Stan model: e98734dc2427bc4c0dad3de78b51228b.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## beta[1] 2.63 0.00 0.20 2.25 2.50 2.63 2.77 3.03 4524 1
## beta[2] -0.01 0.00 0.00 -0.01 -0.01 -0.01 0.00 0.00 9593 1
## beta[3] -0.45 0.00 0.18 -0.82 -0.58 -0.45 -0.33 -0.10 4366 1
## beta[4] -1.29 0.00 0.20 -1.70 -1.43 -1.29 -1.16 -0.90 5033 1
## a_gamma 0.99 0.00 0.10 0.81 0.92 0.99 1.06 1.20 8674 1
## phi 1.02 0.00 0.10 0.83 0.95 1.01 1.09 1.23 8491 1
## lp__ -887.96 0.02 1.57 -891.83 -888.76 -887.64 -886.80 -885.89 4827 1
##
## Samples were drawn using NUTS(diag_e) at Tue Jan 1 07:29:57 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
pairs(stan_model_negbin_covs, pars = pars)
traceplot(stan_model_negbin_covs, inc_warmup = TRUE, pars = pars)
plot_draws(stan_model_negbin_covs, n_sample = 20)
loo(stan_model_negbin_covs)
##
## Computed from 12000 by 314 log-likelihood matrix
##
## Estimate SE
## elpd_loo -870.6 20.3
## p_loo 4.8 0.6
## looic 1741.1 40.5
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
We see the similarity in the posterior for \(a_{\gamma}\) as well as the posterior predictive plots, empirically confirming the equivalence of these two models.
## Frequentist negative binomial fit for comparison
summary(MASS::glm.nb(formula = daysabs ~ math + prog, data = data1))
##
## Call:
## MASS::glm.nb(formula = daysabs ~ math + prog, data = data1, init.theta = 1.032713156,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1547 -1.0192 -0.3694 0.2285 2.5273
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.615265 0.197460 13.245 < 2e-16 ***
## math -0.005993 0.002505 -2.392 0.0167 *
## progAcademic -0.440760 0.182610 -2.414 0.0158 *
## progVocational -1.278651 0.200720 -6.370 1.89e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1.0327) family taken to be 1)
##
## Null deviance: 427.54 on 313 degrees of freedom
## Residual deviance: 358.52 on 310 degrees of freedom
## AIC: 1741.3
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1.033
## Std. Err.: 0.106
##
## 2 x log-likelihood: -1731.258
Due to the vague prior, the Frequentist fit is similar.
The corresponding posterior samples are very similar between these two models.
cat("Poisson with gamma random effects\n")
## Poisson with gamma random effects
print(stan_model_poisson_covs_gamma, pars = c("beta","a_gamma"))
## Inference for Stan model: 6c9eb83e4c4e607ea8864804bdd7b685.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## beta[1] 2.63 0 0.20 2.24 2.49 2.62 2.76 3.03 2494 1.01
## beta[2] -0.01 0 0.00 -0.01 -0.01 -0.01 0.00 0.00 3709 1.00
## beta[3] -0.45 0 0.19 -0.82 -0.57 -0.44 -0.32 -0.09 2387 1.00
## beta[4] -1.28 0 0.21 -1.70 -1.42 -1.28 -1.14 -0.88 2970 1.00
## a_gamma 0.99 0 0.10 0.80 0.92 0.99 1.06 1.21 5969 1.00
##
## Samples were drawn using NUTS(diag_e) at Tue Jan 1 07:28:26 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
cat("Negative binomial\n")
## Negative binomial
print(stan_model_negbin_covs, pars = c("beta","a_gamma"))
## Inference for Stan model: e98734dc2427bc4c0dad3de78b51228b.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## beta[1] 2.63 0 0.20 2.25 2.50 2.63 2.77 3.03 4524 1
## beta[2] -0.01 0 0.00 -0.01 -0.01 -0.01 0.00 0.00 9593 1
## beta[3] -0.45 0 0.18 -0.82 -0.58 -0.45 -0.33 -0.10 4366 1
## beta[4] -1.29 0 0.20 -1.70 -1.43 -1.29 -1.16 -0.90 5033 1
## a_gamma 0.99 0 0.10 0.81 0.92 0.99 1.06 1.20 8674 1
##
## Samples were drawn using NUTS(diag_e) at Tue Jan 1 07:29:57 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
Figures may be more informative.
both_draws <- bind_rows(tidybayes::tidy_draws(stan_model_poisson_covs_gamma) %>%
select(.chain, .iteration, .draw, starts_with("beta"), a_gamma) %>%
gather(key = key, value = value, starts_with("beta"), a_gamma) %>%
mutate(model = "Poisson Mixed"),
##
tidybayes::tidy_draws(stan_model_negbin_covs) %>%
select(.chain, .iteration, .draw, starts_with("beta"), a_gamma) %>%
gather(key = key, value = value, starts_with("beta"), a_gamma) %>%
mutate(model = "Negative Binomial"))
both_draws %>%
ggplot(mapping = aes(x = value, color = model)) +
geom_density() +
facet_wrap( ~ key, scales = "free") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
both_draws %>%
ggplot(mapping = aes(y = value, x = model, color = model)) +
geom_boxplot() +
facet_wrap( ~ key, scales = "free") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
both_draws %>%
ggplot(mapping = aes(y = value, x = model, color = model)) +
geom_violin() +
facet_wrap( ~ key, scales = "free") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
both_draws %>%
ggplot(mapping = aes(y = value, x = model, color = model)) +
tidybayes::geom_eye() +
facet_wrap( ~ key, scales = "free") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
Posterior predictive distributions can also be compared. They look somewhat more different.
both_ppd <- bind_rows(extract_post_pred(stan_model_poisson_covs_gamma) %>%
mutate(model = "Poisson Mixed"),
extract_post_pred(stan_model_negbin_covs) %>%
mutate(model = "Negative Binomial"))
both_ppd %>%
group_by(model, .chain, .iteration, .draw) %>%
count(value) %>%
group_by(model, value) %>%
tidybayes::median_qi(n, .width = c(0.95, 0.80, 0.50)) %>%
ggplot(mapping = aes(x = value, y = n)) +
tidybayes::geom_interval() +
geom_point(data = data1_count,
mapping = aes(x = y, y = n)) +
geom_line(data = data1_count,
mapping = aes(x = y, y = n)) +
scale_color_brewer() +
facet_wrap( ~ model) +
coord_cartesian(xlim = c(0, 50)) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
both_ppd_qi <- both_ppd %>%
group_by(model, .chain, .iteration, .draw) %>%
count(value) %>%
group_by(model, value) %>%
tidybayes::median_qi(n, .width = c(0.95, 0.80, 0.50))
both_ppd_qi %>%
ggplot(mapping = aes(x = value, y = n)) +
geom_line() +
geom_ribbon(data = filter(both_ppd_qi, .width == 0.95),
mapping = aes(ymin = .lower, ymax = .upper),
fill = "gray95") +
geom_ribbon(data = filter(both_ppd_qi, .width == 0.80),
mapping = aes(ymin = .lower, ymax = .upper),
fill = "gray80") +
geom_ribbon(data = filter(both_ppd_qi, .width == 0.50),
mapping = aes(ymin = .lower, ymax = .upper),
fill = "gray50") +
geom_point(data = data1_count,
mapping = aes(x = y, y = n)) +
geom_line(data = data1_count,
mapping = aes(x = y, y = n)) +
facet_wrap( ~ model) +
coord_cartesian(xlim = c(0, 50)) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
Now we compare the goodness of fit for the Poisson model and negative binomial model.
cat("Poisson\n")
## Poisson
print(stan_model_poisson_covs, pars = c("beta"))
## Inference for Stan model: 1bf021e5d6a2cfcbeea4a9c4dd4e7110.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## beta[1] 2.65 0 0.06 2.53 2.61 2.65 2.69 2.77 3957 1
## beta[2] -0.01 0 0.00 -0.01 -0.01 -0.01 -0.01 -0.01 8260 1
## beta[3] -0.44 0 0.06 -0.55 -0.48 -0.44 -0.40 -0.33 3929 1
## beta[4] -1.28 0 0.08 -1.44 -1.34 -1.28 -1.23 -1.13 4375 1
##
## Samples were drawn using NUTS(diag_e) at Tue Jan 1 07:21:15 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
cat("Negative binomial\n")
## Negative binomial
print(stan_model_negbin_covs, pars = c("beta","a_gamma"))
## Inference for Stan model: e98734dc2427bc4c0dad3de78b51228b.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## beta[1] 2.63 0 0.20 2.25 2.50 2.63 2.77 3.03 4524 1
## beta[2] -0.01 0 0.00 -0.01 -0.01 -0.01 0.00 0.00 9593 1
## beta[3] -0.45 0 0.18 -0.82 -0.58 -0.45 -0.33 -0.10 4366 1
## beta[4] -1.29 0 0.20 -1.70 -1.43 -1.29 -1.16 -0.90 5033 1
## a_gamma 0.99 0 0.10 0.81 0.92 0.99 1.06 1.20 8674 1
##
## Samples were drawn using NUTS(diag_e) at Tue Jan 1 07:29:57 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
As seen here posterior means are very similar for the corresponding parameters. Again figures may be more informative.
both_draws <- bind_rows(tidybayes::tidy_draws(stan_model_poisson_covs) %>%
select(.chain, .iteration, .draw, starts_with("beta")) %>%
gather(key = key, value = value, starts_with("beta")) %>%
mutate(model = "Poisson"),
##
tidybayes::tidy_draws(stan_model_negbin_covs) %>%
select(.chain, .iteration, .draw, starts_with("beta"), a_gamma) %>%
gather(key = key, value = value, starts_with("beta"), a_gamma) %>%
mutate(model = "Negative Binomial"))
both_draws %>%
ggplot(mapping = aes(x = value, color = model)) +
geom_density() +
facet_wrap( ~ key, scales = "free") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
both_draws %>%
ggplot(mapping = aes(y = value, x = model, color = model)) +
geom_boxplot() +
facet_wrap( ~ key, scales = "free") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
both_draws %>%
ggplot(mapping = aes(y = value, x = model, color = model)) +
geom_violin() +
facet_wrap( ~ key, scales = "free") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
both_draws %>%
ggplot(mapping = aes(y = value, x = model, color = model)) +
tidybayes::geom_eye() +
facet_wrap( ~ key, scales = "free") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
Now we can better appreciate the more wide spread posteriors for the corresponding parameters.
Posterior predictive distributions can also be compared.
both_ppd <- bind_rows(extract_post_pred(stan_model_poisson_covs) %>%
mutate(model = "Poisson"),
extract_post_pred(stan_model_negbin_covs) %>%
mutate(model = "Negative Binomial"))
both_ppd %>%
group_by(model, .chain, .iteration, .draw) %>%
count(value) %>%
group_by(model, value) %>%
tidybayes::median_qi(n, .width = c(0.95, 0.80, 0.50)) %>%
ggplot(mapping = aes(x = value, y = n)) +
tidybayes::geom_interval() +
geom_point(data = data1_count,
mapping = aes(x = y, y = n)) +
geom_line(data = data1_count,
mapping = aes(x = y, y = n)) +
scale_color_brewer() +
facet_wrap( ~ model) +
coord_cartesian(xlim = c(0, 50)) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
both_ppd_qi <- both_ppd %>%
group_by(model, .chain, .iteration, .draw) %>%
count(value) %>%
group_by(model, value) %>%
tidybayes::median_qi(n, .width = c(0.95, 0.80, 0.50))
both_ppd_qi %>%
ggplot(mapping = aes(x = value, y = n)) +
geom_line() +
geom_ribbon(data = filter(both_ppd_qi, .width == 0.95),
mapping = aes(ymin = .lower, ymax = .upper),
fill = "gray95") +
geom_ribbon(data = filter(both_ppd_qi, .width == 0.80),
mapping = aes(ymin = .lower, ymax = .upper),
fill = "gray80") +
geom_ribbon(data = filter(both_ppd_qi, .width == 0.50),
mapping = aes(ymin = .lower, ymax = .upper),
fill = "gray50") +
geom_point(data = data1_count,
mapping = aes(x = y, y = n)) +
geom_line(data = data1_count,
mapping = aes(x = y, y = n)) +
facet_wrap( ~ model) +
coord_cartesian(xlim = c(0, 50)) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
In this case, the Poisson model fits data apparently poorly. We can compare these models more formally using WAIC.
cat("WAIC Poisson")
## WAIC Poisson
waic_stan_model_poisson_covs <- loo::waic(loo::extract_log_lik(stan_model_poisson_covs))
## Warning: 18 (5.7%) p_waic estimates greater than 0.4. We recommend trying loo instead.
waic_stan_model_poisson_covs
##
## Computed from 12000 by 314 log-likelihood matrix
##
## Estimate SE
## elpd_waic -1343.5 74.5
## p_waic 25.1 3.4
## waic 2686.9 149.1
## Warning: 18 (5.7%) p_waic estimates greater than 0.4. We recommend trying loo instead.
cat("WAIC negative binomial")
## WAIC negative binomial
waic_stan_model_negbin_covs <- loo::waic(loo::extract_log_lik(stan_model_negbin_covs))
waic_stan_model_negbin_covs
##
## Computed from 12000 by 314 log-likelihood matrix
##
## Estimate SE
## elpd_waic -870.6 20.3
## p_waic 4.8 0.6
## waic 1741.1 40.5
cat("WAIC comparison")
## WAIC comparison
loo::compare(waic_stan_model_poisson_covs,
waic_stan_model_negbin_covs)
## elpd_diff se
## 472.9 61.2
The expected log pointwise predictive density (elpd) is higher for the negative binomial model, suggesting a better fit.
We can also use loo.
cat("LOO Poisson")
## LOO Poisson
loo_stan_model_poisson_covs <- loo::loo(loo::extract_log_lik(stan_model_poisson_covs))
## Warning: Relative effective sample sizes ('r_eff' argument) not specified.
## For models fit with MCMC, the reported PSIS effective sample sizes and
## MCSE estimates will be over-optimistic.
loo_stan_model_poisson_covs
##
## Computed from 12000 by 314 log-likelihood matrix
##
## Estimate SE
## elpd_loo -1343.5 74.5
## p_loo 25.1 3.4
## looic 2686.9 149.1
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
cat("LOO negative binomial")
## LOO negative binomial
loo_stan_model_negbin_covs <- loo::loo(loo::extract_log_lik(stan_model_negbin_covs))
## Warning: Relative effective sample sizes ('r_eff' argument) not specified.
## For models fit with MCMC, the reported PSIS effective sample sizes and
## MCSE estimates will be over-optimistic.
loo_stan_model_negbin_covs
##
## Computed from 12000 by 314 log-likelihood matrix
##
## Estimate SE
## elpd_loo -870.6 20.3
## p_loo 4.8 0.6
## looic 1741.1 40.5
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
cat("LOO comparison")
## LOO comparison
loo::compare(loo_stan_model_poisson_covs,
loo_stan_model_negbin_covs)
## elpd_diff se
## 472.9 61.2
The results seem similar in this case.
print(sessionInfo())
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS 10.14.2
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] bindrcpp_0.2.2 loo_2.0.0 bayesplot_1.6.0 rstan_2.18.1 StanHeaders_2.18.0
## [6] forcats_0.3.0 stringr_1.3.1 dplyr_0.7.7 purrr_0.2.5 readr_1.1.1
## [11] tidyr_0.8.2 tibble_1.4.2 ggplot2_3.1.0 tidyverse_1.2.1 doRNG_1.7.1
## [16] rngtools_1.3.1 pkgmaker_0.27 registry_0.5 doParallel_1.0.14 iterators_1.0.10
## [21] foreach_1.4.4 knitr_1.20
##
## loaded via a namespace (and not attached):
## [1] minqa_1.2.4 colorspace_1.3-2 ggridges_0.5.1 rsconnect_0.8.8
## [5] rprojroot_1.3-2 ggstance_0.3.1 markdown_0.8 base64enc_0.1-3
## [9] rstudioapi_0.8 svUnit_0.7-12 DT_0.4 fansi_0.4.0
## [13] lubridate_1.7.4 xml2_1.2.0 codetools_0.2-15 splines_3.5.1
## [17] pscl_1.5.2 shinythemes_1.1.1 jsonlite_1.5 nloptr_1.2.1
## [21] broom_0.5.0 shiny_1.1.0 compiler_3.5.1 httr_1.3.1
## [25] backports_1.1.2 assertthat_0.2.0 Matrix_1.2-14 lazyeval_0.2.1
## [29] cli_1.0.1 later_0.7.5 htmltools_0.3.6 prettyunits_1.0.2
## [33] tools_3.5.1 igraph_1.2.2 coda_0.19-2 gtable_0.2.0
## [37] glue_1.3.0 reshape2_1.4.3 Rcpp_0.12.19 cellranger_1.1.0
## [41] nlme_3.1-137 crosstalk_1.0.0 ps_1.2.0 lme4_1.1-18-1
## [45] rvest_0.3.2 mime_0.6 miniUI_0.1.1.1 tidybayes_1.0.3
## [49] gtools_3.8.1 MASS_7.3-51 zoo_1.8-4 scales_1.0.0
## [53] rstanarm_2.18.1 colourpicker_1.0 hms_0.4.2 promises_1.0.1
## [57] inline_0.3.15 RColorBrewer_1.1-2 shinystan_2.5.0 yaml_2.2.0
## [61] gridExtra_2.3 stringi_1.2.4 dygraphs_1.1.1.6 pkgbuild_1.0.2
## [65] bibtex_0.4.2 rlang_0.3.0.1 pkgconfig_2.0.2 matrixStats_0.54.0
## [69] evaluate_0.12 lattice_0.20-35 bindr_0.1.1 splines2_0.2.8
## [73] rstantools_1.5.1 htmlwidgets_1.3 labeling_0.3 processx_3.2.0
## [77] tidyselect_0.2.5 biostan_0.1.0 plyr_1.8.4 magrittr_1.5
## [81] R6_2.3.0 pillar_1.3.0 haven_1.1.2 withr_2.1.2
## [85] xts_0.11-1 survival_2.43-1 modelr_0.1.2 crayon_1.3.4
## [89] arrayhelpers_1.0-20160527 KernSmooth_2.23-15 utf8_1.1.4 rmarkdown_1.10
## [93] grid_3.5.1 readxl_1.1.0 callr_3.0.0 threejs_0.3.1
## [97] digest_0.6.18 xtable_1.8-3 httpuv_1.4.5 stats4_3.5.1
## [101] munsell_0.5.0 shinyjs_1.0
## Record execution time and multicore use
end_time <- Sys.time()
diff_time <- difftime(end_time, start_time, units = "auto")
cat("Started ", as.character(start_time), "\n",
"Finished ", as.character(end_time), "\n",
"Time difference of ", diff_time, " ", attr(diff_time, "units"), "\n",
"Used ", foreach::getDoParWorkers(), " cores\n",
"Used ", foreach::getDoParName(), " as backend\n",
sep = "")
## Started 2019-01-01 07:19:29
## Finished 2019-01-01 07:31:38
## Time difference of 12.14532 mins
## Used 12 cores
## Used doParallelMC as backend