\(j=1,\ldots,J\) units
\(k=1,\ldots,K_j\) observations made on the \(j^{th}\) unit
\(p=1,\ldots,P\) covariates
\[y_{jk} \sim \operatorname{N}(\theta + \alpha_j + \boldsymbol{X}_{jk}^T \boldsymbol{\beta}, \sigma^2)\]
where \(\boldsymbol{X}_{jk}^T=[X_1, \ldots, X_P]\) and \(\boldsymbol{\beta}^T=[\beta_1, \ldots, \beta_P]\)
# rm(list=ls())
rm(list=ls()[-which(ls()=="stan_fit")])
set.seed(38929)
# Datagen -----------------------------------------------------------------
Var1 <- c(0.3, 0.4, 0.5)
Var1Inv <- 1/Var1
Var2 <- c(300, 400, 500)
Var2Inv <- (1/Var2) * 10^3
Var3 <- c(100, 140) / 100
Xmain <- expand.grid(Var3, Var1Inv, Var2Inv)
# 8 /w replicates, 4 /wout replicates
Xmain <- rbind(Xmain[c(5:8, 11:14),], Xmain[c(5:8, 11:14),], Xmain[15:18,])
colnames(Xmain) <- c("Var3", "Var1Inv", "Var2Inv")
nunits <- nrow(Xmain)
Xmain$ID <- 1:nunits
rownames(Xmain) <- Xmain$ID
# K=12 observations per unit
Xbigger <- do.call(rbind, replicate(12, Xmain, simplify=FALSE))
Xcenter <- data.frame(scale(Xbigger, center=TRUE, scale=FALSE))
Xobs <- model.matrix(~(Var3 + Var2Inv + Var1Inv)^3 +
+ I(Var1Inv^2) + I(Var2Inv^2), Xcenter)[,-1]
true_alpha <- sample(-5:5, nunits, replace=TRUE)
true_theta <- mean(true_alpha)
true_alpha <- true_alpha - true_theta
true_beta <- sample(-5:5, ncol(Xobs), replace=TRUE)
true_beta <- true_beta*10
true_mean <- true_theta + true_alpha + as.vector(Xobs %*% true_beta)
true_sigma <- 0.3
Yobs <- true_mean + rnorm(nrow(Xobs), 0, true_sigma)
# Model -------------------------------------------------------------------
model_code <- '
data {
int<lower=1> J;
int<lower=0> N;
int<lower=1> P;
matrix[N, P] x;
vector[N] y;
int IDs[N];
}
parameters {
real theta;
real<lower=0> tau_alpha;
vector[J] alpha;
vector[P] beta;
real<lower=0> sigma;
} model {
theta ~ normal(0, 50);
tau_alpha ~ normal(0, 10);
sigma ~ normal(0, 10);
alpha ~ normal(0, tau_alpha);
beta ~ normal(0, 50);
y ~ normal(theta + alpha[IDs] + x * beta, sigma);
}
'
model_dat <- list(x=Xobs, y=Yobs, N=nrow(Xobs), P=ncol(Xobs), J=nunits, IDs=Xbigger$ID)
stan_fit <- stan(model_code=model_code,
model_name="interactions_model",
# fit=stan_fit,
data=model_dat,
iter=3000,
chains=4,
thin=1,
warmup=1000,
seed=89)
##
## SAMPLING FOR MODEL 'interactions_model' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 3000 [ 0%] (Warmup)
## Chain 1, Iteration: 300 / 3000 [ 10%] (Warmup)
## Chain 1, Iteration: 600 / 3000 [ 20%] (Warmup)
## Chain 1, Iteration: 900 / 3000 [ 30%] (Warmup)
## Chain 1, Iteration: 1001 / 3000 [ 33%] (Sampling)
## Chain 1, Iteration: 1300 / 3000 [ 43%] (Sampling)
## Chain 1, Iteration: 1600 / 3000 [ 53%] (Sampling)
## Chain 1, Iteration: 1900 / 3000 [ 63%] (Sampling)
## Chain 1, Iteration: 2200 / 3000 [ 73%] (Sampling)
## Chain 1, Iteration: 2500 / 3000 [ 83%] (Sampling)
## Chain 1, Iteration: 2800 / 3000 [ 93%] (Sampling)
## Chain 1, Iteration: 3000 / 3000 [100%] (Sampling)
## Elapsed Time: 55.8391 seconds (Warm-up)
## 141.068 seconds (Sampling)
## 196.907 seconds (Total)
## The following numerical problems occured the indicated number of times after warmup on chain 1
## count
## Exception thrown at line 20: normal_log: Scale parameter is 0, but must be > 0! 1
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## If the number in the 'count' column is small, do not ask about this message on stan-users.
##
## SAMPLING FOR MODEL 'interactions_model' NOW (CHAIN 2).
##
## Chain 2, Iteration: 1 / 3000 [ 0%] (Warmup)
## Chain 2, Iteration: 300 / 3000 [ 10%] (Warmup)
## Chain 2, Iteration: 600 / 3000 [ 20%] (Warmup)
## Chain 2, Iteration: 900 / 3000 [ 30%] (Warmup)
## Chain 2, Iteration: 1001 / 3000 [ 33%] (Sampling)
## Chain 2, Iteration: 1300 / 3000 [ 43%] (Sampling)
## Chain 2, Iteration: 1600 / 3000 [ 53%] (Sampling)
## Chain 2, Iteration: 1900 / 3000 [ 63%] (Sampling)
## Chain 2, Iteration: 2200 / 3000 [ 73%] (Sampling)
## Chain 2, Iteration: 2500 / 3000 [ 83%] (Sampling)
## Chain 2, Iteration: 2800 / 3000 [ 93%] (Sampling)
## Chain 2, Iteration: 3000 / 3000 [100%] (Sampling)
## Elapsed Time: 63.92 seconds (Warm-up)
## 141.084 seconds (Sampling)
## 205.004 seconds (Total)
##
##
## SAMPLING FOR MODEL 'interactions_model' NOW (CHAIN 3).
##
## Chain 3, Iteration: 1 / 3000 [ 0%] (Warmup)
## Chain 3, Iteration: 300 / 3000 [ 10%] (Warmup)
## Chain 3, Iteration: 600 / 3000 [ 20%] (Warmup)
## Chain 3, Iteration: 900 / 3000 [ 30%] (Warmup)
## Chain 3, Iteration: 1001 / 3000 [ 33%] (Sampling)
## Chain 3, Iteration: 1300 / 3000 [ 43%] (Sampling)
## Chain 3, Iteration: 1600 / 3000 [ 53%] (Sampling)
## Chain 3, Iteration: 1900 / 3000 [ 63%] (Sampling)
## Chain 3, Iteration: 2200 / 3000 [ 73%] (Sampling)
## Chain 3, Iteration: 2500 / 3000 [ 83%] (Sampling)
## Chain 3, Iteration: 2800 / 3000 [ 93%] (Sampling)
## Chain 3, Iteration: 3000 / 3000 [100%] (Sampling)
## Elapsed Time: 59.0773 seconds (Warm-up)
## 137.323 seconds (Sampling)
## 196.401 seconds (Total)
##
##
## SAMPLING FOR MODEL 'interactions_model' NOW (CHAIN 4).
##
## Chain 4, Iteration: 1 / 3000 [ 0%] (Warmup)
## Chain 4, Iteration: 300 / 3000 [ 10%] (Warmup)
## Chain 4, Iteration: 600 / 3000 [ 20%] (Warmup)
## Chain 4, Iteration: 900 / 3000 [ 30%] (Warmup)
## Chain 4, Iteration: 1001 / 3000 [ 33%] (Sampling)
## Chain 4, Iteration: 1300 / 3000 [ 43%] (Sampling)
## Chain 4, Iteration: 1600 / 3000 [ 53%] (Sampling)
## Chain 4, Iteration: 1900 / 3000 [ 63%] (Sampling)
## Chain 4, Iteration: 2200 / 3000 [ 73%] (Sampling)
## Chain 4, Iteration: 2500 / 3000 [ 83%] (Sampling)
## Chain 4, Iteration: 2800 / 3000 [ 93%] (Sampling)
## Chain 4, Iteration: 3000 / 3000 [100%] (Sampling)
## Elapsed Time: 55.1955 seconds (Warm-up)
## 127.853 seconds (Sampling)
## 183.049 seconds (Total)
saveRDS(stan_fit, file="stan_fit.rds")
stan_fit <- readRDS(file="stan_fit.rds")
stan_samps <- extract(stan_fit)
round(summary(stan_fit)$summary[,"Rhat"], 2)
## theta tau_alpha alpha[1] alpha[2] alpha[3] alpha[4] alpha[5]
## 1 1 1 1 1 1 1
## alpha[6] alpha[7] alpha[8] alpha[9] alpha[10] alpha[11] alpha[12]
## 1 1 1 1 1 1 1
## alpha[13] alpha[14] alpha[15] alpha[16] alpha[17] alpha[18] alpha[19]
## 1 1 1 1 1 1 1
## alpha[20] beta[1] beta[2] beta[3] beta[4] beta[5] beta[6]
## 1 1 1 1 1 1 1
## beta[7] beta[8] beta[9] sigma lp__
## 1 1 1 1 1
# Graphs ------------------------------------------------------------------
alpha_names <- names(stan_fit)[startsWith(names(stan_fit), "alpha")]
beta_names <- names(stan_fit)[startsWith(names(stan_fit), "beta")]
traceplot(stan_fit, alpha_names)
traceplot(stan_fit, beta_names)
traceplot(stan_fit, c("theta", "sigma", "tau_alpha"))
coef_plot <- function(samples_mat, true_values, main) {
quants <- apply(samples_mat, 2, quantile,
prob=c(0.025, 0.25, 0.5, 0.75, 0.975))
plot(true_values,
ylim=c(min(c(quants, true_values)), max(quants, true_values)),
type='n', ylab="", main=main)
nvars <- length(true_values)
arrows(x0=1:nvars, y0=quants["2.5%",], y1=quants["97.5%",],
angle=90, length=0.03, code=3, col="blue", lty=2)
arrows(x0=1:nvars, y0=quants["25%",], y1=quants["75%",],
angle=90, length=0, code=3, col="blue", lwd=3)
points(1:nvars, quants["50%",], pch=16, col="blue")
points(1:nvars, true_values, pch='x', col="red")
}
Coefficient plots showing posterior distributions of \(\alpha\) and \(\beta\). Dashed lines show 95% posterior credible interval. Solid lines show 50% posterior credible interval. Median shown as blue circle. True parameter values are overlayed as red x’s.
coef_plot(stan_samps$alpha, true_alpha, expression(alpha))
coef_plot(stan_samps$beta, true_beta, expression(beta))
Histograms of posterior distributions of Theta and Sigma. True values are indicated by vertical red bar.
hist(stan_samps$theta, main=expression(theta), xlab="")
abline(v=true_theta, col="red")
hist(stan_samps$sigma, main=expression(sigma), xlab="")
abline(v=true_sigma, col="red")