set.seed(20)
# Simulate the two data sets and plot them
# Choose sample size and parameter values for both data sets
nsites1 <- 500 # Sample size for count data
nsites2 <- 1000 # Sample size for zero-truncated counts
nsites3 <- 2000 # Sample size for detection/nondetection data
mean.lam <- 2 # Average expected abundance (lambda) per site
beta <- -2 # Coefficient of elevation covariate on lambda
truth <- c("log.lam" = log(mean.lam),"beta" = beta) # Save truth
# Simulate elevation covariate for all three and standardize to mean of 1000 and
# standard deviation also of 1000 m
elev1 <- sort(runif(nsites1, 200, 2000)) # Imagine 200–2000 m a.s.l.
elev2 <- sort(runif(nsites2, 200, 2000))
elev3 <- sort(runif(nsites3, 200, 2000))
selev1 <- (elev1-1000)/1000 # Scaled elev1
selev2 <- (elev2-1000)/1000 # Scaled elev2
selev3 <- (elev3-1000)/1000 # Scaled elev3
# Create three regular count data sets with log-linear effects
C1 <- rpois(nsites1, exp(log(mean.lam) + beta * selev1))
C2 <- rpois(nsites2, exp(log(mean.lam) + beta * selev2))
C3 <- rpois(nsites3, exp(log(mean.lam) + beta * selev3))
table(C1) # Tabulate data set 1
# Create data set 2 (C2) by zero-truncating (discard all zeroes)
ztC2 <- C2 # Make a copy
ztC2 <- ztC2[ztC2 > 0] # Tossing out zeroes yields zero-truncated data
table(C2); table(ztC2) # tabulate both original and ZT data set
# Turn count data set 3 (C3) into detection/nondetection data (y)
y <- C3 # Make a copy
y[y > 1] <- 1 # Squash to binary
table(C3) ; table(y) # tabulate both original counts and DNDIPM lecture code
Introduction
Thank you for listening to my lecture!
This file contains all you need in order to run the examples in STAN and to run your first integrated model. Let me know if you have any questions!
Data and code
This example, was inspired by a similar example (and dataset) found in the following book:
Kéry, M., & Kellner, K. F. (2024). Applied statistical modelling for ecologists: A practical guide to Bayesian and likelihood inference using R, JAGS, NIMBLE, Stan and TMB (1st edition). Elsevier.
I highly recommend this book for any ecologist!
The dataset in the example is not real, and was obtained by simulating data. This is an ideal way to test new quantitative models because it allows you to compare your estimated to the real parameters! Here is the code I used to simulate the data:
I reformatted the data following the simulation. The provided code will not result in the dataframes or csv’s provided to you.
In this document I use the data that I provided:
countdata1<-read.csv("countdata1.csv")
countdata2<-read.csv("countdata2.csv")
detdata<-read.csv("detection.csv")These are also the datasets I used for the examples in the presentation
Running the models in Stan
Model 1: GLM
The first model is a GLM. It is pretty easy to run in STAN,
This is how I would do it, but you can modify it (be aware, I am more familiar with JAGS)
library(rstan)
bdata <- list(C1 = countdata1$counts, nsites1 = nrow(countdata1), selev1 = countdata1$selev)
cat(file = "model4.stan",
"data {
int nsites1;
array[nsites1] int C1;
vector[nsites1] selev1;
}
parameters {
real alpha;
real beta;
}
model {
vector[nsites1] lambda1;
// Priors
alpha ~ uniform(-10, 10);
beta ~ normal(0, 100);
// Likelihood
for (i in 1:nsites1){
lambda1[i] = exp(alpha + beta * selev1[i]);
C1[i] ~ poisson(lambda1[i]);
}
}
generated quantities {
real mean_lam = exp(alpha);
}
" )
# Parameters monitored
params <- c("mean_lam", "alpha", "beta")
# HMC settings
ni <- 2000 ; nb <- 1000 ; nc <- 4 ; nt <- 1
# Call STAN, assess convergence and print results table
out4.stan <- stan(file = "model4.stan", data = bdata,
warmup = nb, iter = ni, chains = nc, thin = nt)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 0.000166 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.66 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 0.478 seconds (Warm-up)
Chain 1: 0.407 seconds (Sampling)
Chain 1: 0.885 seconds (Total)
Chain 1:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 7.3e-05 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.73 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 0.442 seconds (Warm-up)
Chain 2: 0.448 seconds (Sampling)
Chain 2: 0.89 seconds (Total)
Chain 2:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 7.6e-05 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.76 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 0.441 seconds (Warm-up)
Chain 3: 0.442 seconds (Sampling)
Chain 3: 0.883 seconds (Total)
Chain 3:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 8.2e-05 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.82 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 0.446 seconds (Warm-up)
Chain 4: 0.466 seconds (Sampling)
Chain 4: 0.912 seconds (Total)
Chain 4:
rstan::traceplot(out4.stan) print(out4.stan, dig = 3) Inference for Stan model: anon_model.
4 chains, each with iter=2000; warmup=1000; thin=1;
post-warmup draws per chain=1000, total post-warmup draws=4000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff
alpha 0.694 0.001 0.038 0.621 0.668 0.695 0.719 0.769 1317
beta -1.961 0.002 0.069 -2.095 -2.007 -1.962 -1.914 -1.822 1368
mean_lam 2.003 0.002 0.075 1.861 1.951 2.003 2.052 2.158 1315
lp__ 553.158 0.029 1.007 550.370 552.779 553.454 553.885 554.158 1210
Rhat
alpha 1.003
beta 1.002
mean_lam 1.003
lp__ 1.006
Samples were drawn using NUTS(diag_e) at Thu Nov 21 00:53:23 2024.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
stan_est <- summary(out4.stan)$summary[1:2,1]Model 2: Zero truncated
It is very easy to fit zero truncated models in stan (or JAGS).
We simply use the T() syntax
bdata <- list(C2 = countdata2$counts, nsites2 = nrow(countdata2), selev2 = countdata2$selev)
cat(file = "model4.stan",
"data {
int nsites2;
array[nsites2] int C2;
vector[nsites2] selev2;
}
parameters {
real alpha;
real beta;
}
model {
vector[nsites2] lambda2;
// Priors
alpha ~ uniform(-10, 10);
beta ~ normal(0, 100);
// Likelihood
for (j in 1:nsites2){
lambda2[j] = exp(alpha + beta * selev2[j]);
C2[j] ~ poisson(lambda2[j]) T[1, ];
}
}
generated quantities {
real mean_lam = exp(alpha);
}
" )
# Parameters monitored
params <- c("mean_lam", "alpha", "beta")
# HMC settings
ni <- 2000 ; nb <- 1000 ; nc <- 4 ; nt <- 1
# Call STAN , assess convergence and print results table
system.time(
out4.stan <- stan(file = "model4.stan", data = bdata,
warmup = nb, iter = ni, chains = nc, thin = nt) )
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 0.000334 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 3.34 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 2.043 seconds (Warm-up)
Chain 1: 1.95 seconds (Sampling)
Chain 1: 3.993 seconds (Total)
Chain 1:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 0.000258 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 2.58 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 1.915 seconds (Warm-up)
Chain 2: 1.861 seconds (Sampling)
Chain 2: 3.776 seconds (Total)
Chain 2:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 0.000307 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 3.07 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 1.885 seconds (Warm-up)
Chain 3: 2.096 seconds (Sampling)
Chain 3: 3.981 seconds (Total)
Chain 3:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 0.000343 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 3.43 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 1.945 seconds (Warm-up)
Chain 4: 1.938 seconds (Sampling)
Chain 4: 3.883 seconds (Total)
Chain 4:
user system elapsed
16.31 0.08 57.53
rstan::traceplot(out4.stan) print(out4.stan, dig = 3) Inference for Stan model: anon_model.
4 chains, each with iter=2000; warmup=1000; thin=1;
post-warmup draws per chain=1000, total post-warmup draws=4000.
mean se_mean sd 2.5% 25% 50% 75% 97.5%
alpha 0.644 0.001 0.039 0.567 0.618 0.643 0.670 0.720
beta -2.038 0.002 0.071 -2.174 -2.086 -2.038 -1.991 -1.894
mean_lam 1.905 0.002 0.074 1.763 1.855 1.903 1.953 2.054
lp__ 1418.915 0.027 0.992 1416.411 1418.509 1419.224 1419.634 1419.898
n_eff Rhat
alpha 1243 1.000
beta 1321 1.000
mean_lam 1239 1.000
lp__ 1357 1.001
Samples were drawn using NUTS(diag_e) at Thu Nov 21 00:54:22 2024.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
stan_est <- summary(out4.stan)$summary[1:2,1]Model 3: cloglog bernoulli
bdata <- list(y = detdata$y, nsites3 = nrow(detdata), selev3 = detdata$selev)
cat(file = "model4.stan",
"data {
int nsites3;
array[nsites3] int y;
vector[nsites3] selev3;
}
parameters {
real alpha;
real beta;
}
model {
vector[nsites3] psi;
// Priors
alpha ~ uniform(-10, 10);
beta ~ normal(0, 100);
// Likelihood
for (k in 1:nsites3){
psi[k] = inv_cloglog(alpha + beta * selev3[k]);
y[k] ~ bernoulli(psi[k]);
}
}
generated quantities {
real mean_lam = exp(alpha);
}
" )
# Parameters monitored
params <- c("mean_lam", "alpha", "beta")
# HMC settings
ni <- 2000 ; nb <- 1000 ; nc <- 4 ; nt <- 1
# Call STAN (ART 90/45 sec), assess convergence and print results table
system.time(
out4.stan <- stan(file = "model4.stan", data = bdata,
warmup = nb, iter = ni, chains = nc, thin = nt) )
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 0.000703 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 7.03 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 3.504 seconds (Warm-up)
Chain 1: 3.114 seconds (Sampling)
Chain 1: 6.618 seconds (Total)
Chain 1:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 0.000655 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 6.55 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 3.338 seconds (Warm-up)
Chain 2: 3.031 seconds (Sampling)
Chain 2: 6.369 seconds (Total)
Chain 2:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 0.000612 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 6.12 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 3.437 seconds (Warm-up)
Chain 3: 3.477 seconds (Sampling)
Chain 3: 6.914 seconds (Total)
Chain 3:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 0.000638 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 6.38 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 3.353 seconds (Warm-up)
Chain 4: 3.666 seconds (Sampling)
Chain 4: 7.019 seconds (Total)
Chain 4:
user system elapsed
27.42 0.05 68.25
rstan::traceplot(out4.stan) print(out4.stan, dig = 3) Inference for Stan model: anon_model.
4 chains, each with iter=2000; warmup=1000; thin=1;
post-warmup draws per chain=1000, total post-warmup draws=4000.
mean se_mean sd 2.5% 25% 50% 75% 97.5%
alpha 0.714 0.001 0.043 0.632 0.684 0.713 0.743 0.799
beta -1.990 0.002 0.097 -2.183 -2.055 -1.992 -1.923 -1.798
mean_lam 2.043 0.002 0.088 1.882 1.981 2.040 2.101 2.223
lp__ -785.936 0.026 1.053 -788.732 -786.341 -785.609 -785.200 -784.932
n_eff Rhat
alpha 1636 1.000
beta 1826 0.999
mean_lam 1627 1.000
lp__ 1655 1.001
Samples were drawn using NUTS(diag_e) at Thu Nov 21 00:55:31 2024.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
stan_est <- summary(out4.stan)$summary[1:2,1]Run a single integrated model using the three datasets
Other code used in slideshow
Here is all the code used in the slideshow, in case you find it useful:
library(ggplot2)
library(cowplot)
countdata1<-read.csv("countdata1.csv")
countdata2<-read.csv("countdata2.csv")
detdata<-read.csv("detection.csv")
ggplot(data=countdata1, aes(x=elevation, y=counts))+
geom_point()+
geom_smooth(method = glm, method.args = list(family = "poisson"))+
xlab("Elevation (m)")+
ylab("Counts")+
theme_cowplot()summary(fm1 <- glm(counts ~ selev, data= countdata1, family = poisson(link = "log")))library(ASMbook)
NLL1 <- function(param, y, x) {
alpha <- param[1]
beta <- param[2]
lambda <- exp(alpha + beta * x)
LL <- dpois(y, lambda, log = TRUE) # LL contribution for each datum
return(-sum(LL)) # NLL for all data
}
# Minimize NLL
inits <- c(alpha = 0, beta = 0) # Need to provide initial values
sol1 <- optim(inits, NLL1, y = countdata1$counts, x = countdata1$selev, hessian = TRUE, method = 'BFGS')
tmp1 <- get_MLE(sol1, 4)NLL2 <- function(param, y, x) {
alpha <- param[1]
beta <- param[2]
lambda <- exp(alpha + beta * x)
L <- dpois(y, lambda)/(1-ppois(0, lambda)) # L. contribution for each datum
return(-sum(log(L))) # NLL for all data
}
# Minimize NLL
inits <- c(alpha = 0, beta = 0) # Need to provide initial values
sol2 <- optim(inits, NLL2, y = countdata2$counts, x = countdata2$selev, hessian = TRUE, method = 'BFGS')
tmp2 <- get_MLE(sol2, 4)summary(fm3 <- glm(y ~ selev,data=detdata, family = binomial(link = "cloglog")))NLL3 <- function(param, y, x) {
alpha <- param[1]
beta <- param[2]
lambda <- exp(alpha + beta * x)
psi <- 1-exp(-lambda)
LL <- dbinom(y, 1, psi, log = TRUE) # L. contribution for each datum
return(-sum(LL)) # NLL for all data
}
# Minimize NLL
inits <- c(alpha = 0, beta = 0)
sol3 <- optim(inits, NLL3, y = y, x = selev3, hessian = TRUE, method = 'BFGS')
tmp3 <- get_MLE(sol3, 4)NLLjoint <- function(param, y1, x1, y2, x2, y3, x3) {
# Definition of elements in param vector (shared between data sets)
alpha <- param[1] # log-linear intercept
beta <- param[2] # log-linear slope
# Likelihood for the Poisson GLM for data set 1 (y1, x1)
lambda1 <- exp(alpha + beta * x1)
L1 <- dpois(y1, lambda1)
# Likelihood for the ztPoisson GLM for data set 2 (y2, x2)
lambda2 <- exp(alpha + beta * x2)
L2 <- dpois(y2, lambda2)/(1-ppois(0, lambda2))
# Likelihood for the cloglog Bernoulli GLM for data set 3 (y3, x3)
lambda3 <- exp(alpha + beta * x3)
psi <- 1-exp(-lambda3)
L3 <- dbinom(y3, 1, psi)
# Joint log-likelihood and joint NLL: here you can see that sum!
JointLL <- sum(log(L1)) + sum(log(L2)) + sum(log(L3)) # Joint LL
return(-JointLL) # Return joint NLL
}
# Minimize NLLjoint
inits <- c(alpha = 0, beta = 0)
solJoint <- optim(inits, NLLjoint, y1 = countdata1$counts, x1 = countdata1$selev, y2 = countdata2$counts, x2 = countdata2$selev,
y3 =detdata$y, x3 = detdata$selev, hessian = TRUE, method = 'BFGS')
# Get MLE and asymptotic SE and print and save
(tmp4 <- get_MLE(solJoint, 4))
diy_est <- tmp4[,1]