Posterior predicitve plot

Load packages

# these packages might need to be installed first...
library("R2jags")
library("ggplot2")
library("reshape2")

Prepare the data

For this example, we use the same data as in the book, but create a dataframe:

## prepare data ----
k1 <- 4
n1 <- 10

k2 <- 5
n2 <- 10

DATA <- data.frame(counts = c(k1, k2), num = c(n1, n2), condition = c("A", "B"))

head(DATA)

##   counts num condition
## 1      4  10         A
## 2      5  10         B

Write JAGS model

For simplicity, let’s write the JAGS model specification as a string, instead of in a separate file:

jags_model <- "
model{
    # likelihood
    for (n in 1:N) {
        k[n] ~ dbin(theta, num[n])
        # Posterior Predictive
        k_rep[n] ~ dbin(theta, num[n])
    }
    # Prior on Single Rate Theta
    theta ~ dbeta(1, 1)
}"

Define a function to call JAGS:

# function to run JAGS ----
run_model <- function(data = DATA, model = jags_model) {
    jags_data <- with(data, {
        list(k = counts,
             num = num,
             N = length(counts))
    })

    # initial values
    inits <- function() {
        list(theta = rbeta(1, 1, 1))
    }

    # parameters
    parameters <- c("theta", "k_rep")

    model <- jags(data = jags_data,
                  inits = inits,
                  parameters.to.save = parameters,
                  model.file = textConnection(model),
                  n.chains = 3,
                  n.iter = 1000,
                  n.burnin = 100,
                  n.thin = 2,
                  DIC = T)
}

Call the function:

# call run JAGS function ----
fit <- run_model(data = DATA, model = jags_model)

## module glm loaded

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
##    Graph Size: 9
## 
## Initializing model

print(fit)

## Inference for Bugs model at "5", fit using jags,
##  3 chains, each with 1000 iterations (first 100 discarded), n.thin = 2
##  n.sims = 1350 iterations saved
##          mu.vect sd.vect  2.5%   25%   50%   75%  97.5%  Rhat n.eff
## k_rep[1]   4.522   1.877 1.000 3.000 4.000 6.000  8.000 1.001  1300
## k_rep[2]   4.521   1.874 1.000 3.000 5.000 6.000  8.000 1.001  1400
## theta      0.451   0.104 0.243 0.378 0.448 0.523  0.646 1.000  1400
## deviance   6.704   1.279 5.774 5.871 6.206 7.041 10.428 1.000  1400
## 
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
## 
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 0.8 and DIC = 7.5
## DIC is an estimate of expected predictive error (lower deviance is better).

Extract the samples for further processing:

# extract samples ----
theta <- fit$BUGSoutput$sims.list$theta
k_rep <- fit$BUGSoutput$sims.list$k_rep

Prepare samples for plotting:

## plot predicted values ----
df_obs <- data.frame(k1, k2)

df_pred <- data.frame(k1 = k_rep[,1],
                      k2 = k_rep[,2])

m <- melt(table(df_pred), value.name = "counts")

Create plot using ggplot2:

The function theme_bw() gets rid the grey background.

The most important code is this: size = m[,'counts']/5

This makes the size of the point depend on the number of counts.

p1 <- ggplot(m, aes(x = k1, y = k2)) +
    geom_point(shape=19,
                size = m[,'counts']/5,
                alpha=0.8,
                colour = "grey") +
    geom_point(data = df_obs, aes(x = k1,
                                  y = k2),
               size = 6,
               shape = 19,
               colour = "red") +
    xlab("Success count 1") +
    ylab("Success count 2") +
    theme_bw(base_size = 12, base_family = "Helvetica")

Show the plot:

print(p1)

plot of chunk unnamed-chunk-10