Here are the scripts that were “live coded” during the tutorial, all relating to the fish capture re-capture problem.
n_draw <- 100000
# Defining and drawing from the prior distribution
prior_n_fish <- sample(20:250, n_draw, replace = TRUE)
# Defining the generative model
pick_fish <- function(n_fish) {
fish <- rep(0:1, c(n_fish - 20, 20))
sum(sample(fish, 20))
}
# Simulating the data
n_marked <- rep(NA, n_draw)
for(i in 1:n_draw) {
n_marked[i] <- pick_fish(prior_n_fish[i])
}
# Filtering out those parameter values that didn't result in the
# data that we actually observed
post_n_fish <- prior_n_fish[n_marked == 5]
hist(post_n_fish)
length(post_n_fish)
## [1] 8242
# The posterior distribution showing the probability of different number of fish
# (binning here in bins of 20 just make the graph easier to interpret)
barplot(table(cut(post_n_fish, seq(0, 250, 20))) / length(post_n_fish), col = "salmon")
n_draw <- 100000
prior_n_fish <- sample(20:250, n_draw, replace = TRUE)
# This is the only part of the code that has changed from the original version above.
pick_fish <- function(n_fish) {
fish <- rep(0:1, c(n_fish - 20, 20))
prob_pick <- ifelse(fish == 0, 1.0, 0.5)
sum(sample(fish, 20, prob = prob_pick))
}
n_marked <- rep(NA, n_draw)
for(i in 1:n_draw) {
n_marked[i] <- pick_fish(prior_n_fish[i])
}
post_n_fish <- prior_n_fish[n_marked == 5]
hist(post_n_fish)
length(post_n_fish)
## [1] 4211
barplot(table(cut(post_n_fish, seq(0, 250, 20))) / length(post_n_fish), col = "salmon")
n_draw <- 100000
# This is the only part of the code that has changed from the original version above.
prior_n_fish <- rnbinom(n_draw, mu = 200 - 20, size = 4) + 20
hist(prior_n_fish)
pick_fish <- function(n_fish) {
fish <- rep(0:1, c(n_fish - 20, 20))
prob_pick <- ifelse(fish == 0, 1.0, 0.5)
sum(sample(fish, 20, prob = prob_pick))
}
n_marked <- rep(NA, n_draw)
for(i in 1:n_draw) {
n_marked[i] <- pick_fish(prior_n_fish[i])
}
post_n_fish <- prior_n_fish[n_marked == 5]
hist(post_n_fish)
length(post_n_fish)
## [1] 1826
barplot(table(cut(post_n_fish, seq(0, 250, 20))) / length(post_n_fish), col = "salmon")
Here the dhyper distribution is used as it implements the same process as the fish picking model.
library(rjags)
data_list <- list(n_marked = 5)
model_string <- "
model {
n_fish_real ~ dunif(0, 250)
n_fish <- round(n_fish_real)
n_marked ~ dhyper(20, n_fish - 20, 20, 1)
}
"
jags_model <- jags.model(textConnection(model_string), data = data_list)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 1
## Unobserved stochastic nodes: 1
## Total graph size: 18
##
## Initializing model
samples <- coda.samples(jags_model, c("n_fish"), n.iter = 3000)
plot(samples)
s_mat <- as.matrix(samples)
# The probability that there are more than a 100 fish
mean(s_mat[,"n_fish"] > 100)
## [1] 0.4933
# Samples are cool and useful, and make it really easy to "post-process" the
# result of the analysis!
s_mat1 <- s_mat
# Now rerun the model with a different data set (a different lake maybe...)
data_list <- list(n_marked = 10)
jags_model <- jags.model(textConnection(model_string), data = data_list)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 1
## Unobserved stochastic nodes: 1
## Total graph size: 18
##
## Initializing model
samples <- coda.samples(jags_model, c("n_fish"), n.iter = 3000)
s_mat2 <- as.matrix(samples)
hist(s_mat1[,"n_fish"])
hist(s_mat2[,"n_fish"])
# We can now easily compare the samples, while still correctly retaining the
# uncertainty
hist(s_mat1[,"n_fish"] - s_mat2[,"n_fish"])
mean(s_mat1[,"n_fish"] > s_mat2[,"n_fish"])
## [1] 0.979