Compile the model:
model <- "
// negative binomial parameterized as eta (log(mu)) and dispersion (phi)
// see p286 in stan-reference-2.4.0.pdf
// a basic GLM example
data {
int<lower=1> N; // rows of data
vector[N] x; // predictor
int<lower=0> y[N]; // response
}
parameters {
real<lower=0> phi; // neg. binomial dispersion parameter
real b0; // intercept
real b1; // slope
}
model {
// priors:
phi ~ cauchy(0, 3);
b0 ~ normal(0, 5);
b1 ~ normal(0, 5);
// data model:
y ~ neg_binomial_2_log(b0 + b1 * x, phi);
}
"
write(model, file = "negbin-glm.stan")
sm <- stan_model("negbin-glm.stan")
##
## TRANSLATING MODEL 'negbin-glm' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'negbin-glm' NOW.
Simulate data:
set.seed(123)
N <- 70
phi <- 1.2
b0 <- -0.5
b1 <- 0.4
x <- seq(1, 10, length.out = N)
y <- rnbinom(N, size = phi, mu = exp(b0 + (x - mean(x)) * b1))
plot(x, y)
Fit the model:
m <- sampling(sm, data = list(N = N, y = y, x = x - mean(x)),
pars = c("b0", "b1", "phi"),
iter = 500, chains = 4)
m
## Inference for Stan model: negbin-glm.
## 4 chains, each with iter=500; warmup=250; thin=1;
## post-warmup draws per chain=250, total post-warmup draws=1000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## b0 -0.88 0.01 0.26 -1.40 -1.03 -0.86 -0.70 -0.40 372 1.01
## b1 0.41 0.01 0.10 0.23 0.34 0.41 0.48 0.61 355 1.02
## phi 1.68 0.10 1.64 0.52 0.96 1.32 1.91 4.64 275 1.01
## lp__ -44.34 0.08 1.34 -47.72 -44.93 -43.96 -43.39 -42.81 294 1.00
##
## Samples were drawn using NUTS(diag_e) at Sun Oct 19 00:55:57 2014.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
Plot the output:
rstan::traceplot(m, inc_warmup = FALSE)
e <- extract(m, pars = c("b0", "b1", "phi"))
true_pars <- c(b0 = b0, b1 = b1, phi = phi)
x_cent <- x - mean(x)
m_mass <- MASS::glm.nb(y ~ x_cent)
coefs_mass <- c(coef(m_mass), summary(m_mass)$theta)
par(mfrow = c(1, 3))
for(i in 1:3) {
if(i %in% 1:2) {
plot(density(e[[i]]), main = names(true_pars)[i])
} else {
plot(density(e[[i]]), main = names(true_pars)[i], log = "x")
}
abline(v = true_pars[i], lwd = 2, col = "grey", lty = 2)
abline(v = coefs_mass[i], lwd = 3, col = "red")
}
## Warning: 3 x values <= 0 omitted from logarithmic plot
legend("topright", legend = c("posterior", "true", "MASS::glm.nb"),
col = c("black", "grey", "red"), lty = c(1, 2, 1), lwd = c(1, 1.3, 3),
bty = "n")
plot(x, y)
for(i in seq_along(e[[1]])) {
lines(x, exp(e[[1]][i] + e[[2]][i] * (x - mean(x))), col = "#00000008")
}