Simulate and fit negative binomial GLMs in Stan

Compile the model:

model <- "
// negative binomial parameterized as eta (log(mu)) and dispersion (phi)
// see p286 in stan-reference-2.4.0.pdf
// a basic GLM example
data {
  int<lower=1> N;    // rows of data
  vector[N] x;       // predictor
  int<lower=0> y[N]; // response
}
parameters {
  real<lower=0> phi; // neg. binomial dispersion parameter
  real b0;  // intercept
  real b1;  // slope
}
model {
  // priors:
  phi ~ cauchy(0, 3);
  b0 ~ normal(0, 5);
  b1 ~ normal(0, 5);
  // data model:
  y ~ neg_binomial_2_log(b0 + b1 * x, phi);
}
"
write(model, file = "negbin-glm.stan")

sm <- stan_model("negbin-glm.stan")

## 
## TRANSLATING MODEL 'negbin-glm' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'negbin-glm' NOW.

Simulate data:

set.seed(123)
N <- 70
phi <- 1.2
b0 <- -0.5
b1 <- 0.4
x <- seq(1, 10, length.out = N)
y <- rnbinom(N, size = phi, mu = exp(b0 + (x - mean(x)) * b1))
plot(x, y)

plot of chunk unnamed-chunk-4

Fit the model:

m <- sampling(sm, data = list(N = N, y = y, x = x - mean(x)),
  pars = c("b0", "b1", "phi"),
  iter = 500, chains = 4)

## Inference for Stan model: negbin-glm.
## 4 chains, each with iter=500; warmup=250; thin=1; 
## post-warmup draws per chain=250, total post-warmup draws=1000.
## 
##        mean se_mean   sd   2.5%    25%    50%    75%  97.5% n_eff Rhat
## b0    -0.88    0.01 0.26  -1.40  -1.03  -0.86  -0.70  -0.40   372 1.01
## b1     0.41    0.01 0.10   0.23   0.34   0.41   0.48   0.61   355 1.02
## phi    1.68    0.10 1.64   0.52   0.96   1.32   1.91   4.64   275 1.01
## lp__ -44.34    0.08 1.34 -47.72 -44.93 -43.96 -43.39 -42.81   294 1.00
## 
## Samples were drawn using NUTS(diag_e) at Sun Oct 19 00:55:57 2014.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Plot the output:

rstan::traceplot(m, inc_warmup = FALSE)

plot of chunk unnamed-chunk-7

e <- extract(m, pars = c("b0", "b1", "phi"))
true_pars <- c(b0 = b0, b1 = b1, phi = phi)
x_cent <- x - mean(x)
m_mass <- MASS::glm.nb(y ~ x_cent)
coefs_mass <- c(coef(m_mass), summary(m_mass)$theta)
par(mfrow = c(1, 3))
for(i in 1:3) {
  if(i %in% 1:2) {
    plot(density(e[[i]]), main = names(true_pars)[i])
  } else {
    plot(density(e[[i]]), main = names(true_pars)[i], log = "x")
  }
  abline(v = true_pars[i], lwd = 2, col = "grey", lty = 2)
  abline(v = coefs_mass[i], lwd = 3, col = "red")
}

## Warning: 3 x values <= 0 omitted from logarithmic plot

legend("topright", legend = c("posterior", "true", "MASS::glm.nb"),
  col = c("black", "grey", "red"), lty = c(1, 2, 1), lwd = c(1, 1.3, 3),
  bty = "n")

plot of chunk unnamed-chunk-7

plot(x, y)
for(i in seq_along(e[[1]])) {
  lines(x, exp(e[[1]][i] + e[[2]][i] * (x - mean(x))), col = "#00000008")
}

plot of chunk unnamed-chunk-8

Simulate and fit negative binomial GLMs in Stan

Sean Anderson

October 19, 2014