Willow Tit

objetivos:

hacer una regresión logística en JAGS
incorporar la probabilidad de detección en modelos de ocupación

Vamos a trabajar con datos de presencia/ausencia de "Willow Tit" (o Carbonero Montano, Poecile montanus) en puntos de muestreo de Suiza (ver Royle y Dorazio capítulo 3).

Primero copiamos los datos:

y.1 <- c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 
    0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 
    0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 
    1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 
    1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 
    0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 
    1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 
    1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 
    1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
y.2 <- c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 
    0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 
    0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, NA, 1, 1, 0, NA, 0, 0, 0, 1, 0, 0, 
    0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 
    1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 
    0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 
    1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 
    1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 
    1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
y.3 <- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 
    0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, NA, 1, 1, NA, 0, 1, 0, 0, 0, 1, 0, 1, 
    0, 1, 0, 1, NA, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, NA, 
    NA, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, NA, 1, 1, 0, NA, 0, 0, 0, 1, 
    0, 0, 1, 0, 1, 0, 0, 0, NA, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 
    0, 0, 1, 0, 0, 0, 0, 1, NA, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 
    0, 0, 0, NA, 1, 0, NA, 1, 0, 1, 0, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    NA, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, NA, 0, 0, 0, 0, 1, 0, 0, 
    0, 0, 1, 1, 0, 0, 0, 0, NA, 0, 0, 0, 0, NA, NA, NA, NA, 1, NA, NA, NA, 1, 
    1, 1, 1, 1, NA, 1, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA)
elev <- c(420, 450, 1050, 1110, 510, 630, 590, 530, 1140, 770, 1220, 460, 1010, 
    760, 1300, 1270, 380, 550, 390, 1380, 530, 1190, 1490, 920, 620, 540, 820, 
    1220, 1180, 730, 580, 490, 1270, 930, 510, 1830, 1940, 1090, 2010, 790, 
    1730, 1120, 620, 590, 1840, 930, 540, 420, 1400, 890, 1460, 1850, 1420, 
    600, 550, 750, 1790, 680, 990, 520, 760, 370, 2710, 650, 660, 730, 460, 
    360, 250, 1940, 2420, 1320, 790, 470, 410, 1940, 1570, 1880, 1290, 1340, 
    880, 710, 630, 450, 2360, 1650, 2050, 790, 730, 490, 840, 450, 1010, 1450, 
    850, 1240, 570, 1500, 1420, 780, 470, 2080, 2020, 1750, 1630, 640, 530, 
    660, 1360, 1290, 670, 480, 2000, 800, 1010, 680, 490, 400, 320, 1400, 590, 
    430, 410, 510, 1930, 1560, 700, 440, 480, 490, 1850, 490, 480, 440, 820, 
    670, 610, 1700, 900, 1250, 910, 540, 450, 470, 1880, 1400, 470, 830, 1950, 
    1660, 1210, 380, 1840, 950, 880, 480, 550, 550, 340, 550, 1320, 260, 2010, 
    1130, 1910, 1630, 1540, 1340, 1180, 830, 540, 1410, 1320, 890, 580, 500, 
    1220, 710, 1820, 520, 1890, 1650, 1390, 1390, 1290, 1810, 1130, 480, 1040, 
    410, 2030, 1880, 950, 450, 950, 1070, 1980, 1350, 540, 420, 1420, 2240, 
    1980, 2330, 2050, 1180, 2010, 2220, 2310, 2190, 1770, 1830, 1870, 1840, 
    2420, 1820, 1850, 2250, 2250, 2450, 2350, 2650, 2750, 2450, 1950, 2150, 
    1850, 2050, 1850, 2050, 2150, 2050, 1950, 1950, 2250, 2250, 2350)
forest <- c(3, 21, 32, 35, 2, 60, 5, 13, 50, 57, 84, 17, 58, 26, 32, 66, 45, 
    31, 8, 78, 37, 18, 54, 6, 3, 27, 56, 66, 43, 40, 5, 3, 52, 38, 49, 15, 62, 
    82, 0, 5, 58, 98, 33, 53, 17, 16, 60, 2, 52, 58, 50, 12, 29, 16, 83, 79, 
    51, 8, 45, 5, 64, 14, 0, 0, 30, 17, 65, 30, 0, 5, 0, 66, 8, 75, 68, 72, 
    95, 57, 64, 44, 44, 34, 21, 23, 1, 43, 30, 26, 7, 87, 49, 39, 93, 48, 11, 
    73, 12, 42, 26, 13, 2, 15, 54, 74, 45, 61, 33, 54, 69, 47, 5, 36, 60, 72, 
    29, 7, 7, 13, 2, 16, 62, 9, 37, 71, 41, 19, 24, 10, 0, 70, 45, 18, 20, 3, 
    20, 36, 19, 35, 87, 51, 21, 11, 29, 55, 32, 32, 3, 58, 36, 86, 75, 23, 0, 
    71, 69, 6, 67, 67, 42, 72, 79, 47, 26, 56, 18, 33, 21, 39, 69, 42, 98, 52, 
    90, 50, 54, 9, 51, 35, 57, 18, 12, 34, 68, 63, 48, 33, 90, 33, 37, 8, 36, 
    66, 56, 0, 41, 63, 0, 35, 14, 1, 27, 0, 6, 0, 1, 58, 9, 14, 2, 30, 93, 82, 
    81, 82, 0, 69, 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 15, 0, 0, 2, 0, 0, 
    0, 0)

Cada valor en estos vectores representa un sitio de muestreo (hay 237). Cada sitio fue visitado hasta 3 veces y cada vez se anotó si la especie fue detectada o no. y.1 es la primera visita (NA si no se visitó ese sitio esa vez, 1 si se detectó la especie, y 0 si no observamos esa especie en esa visita).
y.2 es la segunda visita e y.3 es la tercer visita y se registra lo mismo que en y.1. Por otro lado, elev es la altitud (en metros sobre el nivel del mar) de cada sitio y forest es el porcentaje de cobertura de bosque de cada sitio.

El objetivo es relacionar la presencia de esta especie de pájaro con la cobertura de bosque y la altitud.

Centramos las dos variables continuas para que sean comparables y creamos una nueva variable, (el cuadrado de la altitud) para testear respuestas no lineales de la altitud

alti <- as.vector(scale(elev, center = TRUE))
bosq <- as.vector(scale(forest, center = TRUE))
alti2 <- alti * alti

ym <- as.matrix(cbind(y.1, y.2, y.3))  # juntamos las 3 variables de  las visitas y las llamamos 'ym'

J <- rowSums(!is.na(ym))  # Contamos cuantas veces visitamos cada sitio.  

M <- nrow(ym)  # Creamos un vector M para guardar el numero total de sitios visitados  
M  # deberia aparecer el valor 237 en la consola de R

## [1] 237

y <- rowSums(ym, na.rm = TRUE)  # contamos el numero de veces que encontramos a la especie en cada sitio.

Ahora vamos a hacer una regresión logística Bayesiana, asumiendo que no hay error de observación. Para eso, primero escribimos el modelo en lenguaje bugs, y lo guardamos como texto:

cat(file = "WillowBin.bug",
    "
    model { 
    for(i in 1:M){      
    logit(z[i]) <- b0 + b1*alti[i] + b2*alti2[i] + b3*bosq[i]              
    y[i] ~ dbin(z[i],J[i])  
    } 
    
    b0 ~ dnorm(0,.001) 
    b1 ~ dnorm(0,.001) 
    b2 ~ dnorm(0,.001) 
    b3 ~ dnorm(0,.001) 
    logit(psi0) <- b0 # aquí guardamos la pr de ocurrencia para condiciones promedio 
    }
    ")

Tenemos que armar una lista de los datos que le vamos a pasar al JAGS, escribir una función para los valores iniciales de las cadenas Markpvianas, definir la lista de parámetros que queremos guardar y los números de iteraciones, "thinning", burnin y cadenas.

data <- list("y", "J", "M", "bosq", "alti", "alti2")

inits <- function() {
    list(b0 = rnorm(1), b1 = rnorm(1), b2 = rnorm(1), b3 = rnorm(1))
}

parameters <- c("b0", "b1", "b2", "b3", "psi0")

ni <- 5000  # número de iteraciones
nt <- 4  # descartamos 'thin' algunos valores
nb <- 1000  # cuantas iteraciones usamos de 'burn in'
nc <- 3  # y cuantas cadenas corremos

Finalmente podemos llamar a jags...

library(jagsUI)

wb.sim <- jags(data, inits, parameters, "willowBin.bug", n.chains = nc, n.thin = nt, 
    n.iter = ni, n.burnin = nb)

## 
## Processing function input....... 
## 
## Done. 
##  
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
##    Graph Size: 1993
## 
## Initializing model
## 
## Adaptive phase, 100 iterations x 3 chains 
## If no progress bar appears JAGS has decided not to adapt 
##  
## 
##  Burn-in phase, 1000 iterations x 3 chains 
##  
## 
## Sampling from joint posterior, 4000 iterations x 3 chains 
##  
## 
## Calculating statistics....... 
## 
## Done.

Siempre que ajustamos un modelo usando MCMC tenemos que asegurarnos que las cadenas hayan convergido y que el tamaño efectivo de las muestras de las posteriores sean razonables:

print(wb.sim)

## JAGS output for model 'willowBin.bug', generated by jagsUI.
## Estimates based on 3 chains of 5000 iterations,
## burn-in = 1000 iterations and thin rate = 4,
## yielding 3000 total samples from the joint posterior. 
## MCMC ran for 0.13 minutes at time 2015-09-30 00:01:30.
## 
##             mean    sd    2.5%     50%   97.5% overlap0 f  Rhat n.eff
## b0        -0.856 0.175  -1.184  -0.863  -0.510    FALSE 1 1.008   232
## b1         2.126 0.205   1.690   2.141   2.489    FALSE 1 1.039    94
## b2        -0.973 0.179  -1.344  -0.965  -0.635    FALSE 1 1.013   359
## b3         0.837 0.133   0.590   0.836   1.118    FALSE 1 1.017   157
## psi0       0.299 0.036   0.234   0.297   0.375    FALSE 1 1.008   231
## deviance 456.380 2.756 453.017 455.670 463.040    FALSE 1 1.026   123
## 
## Successful convergence based on Rhat values (all < 1.1). 
## Rhat is the potential scale reduction factor (at convergence, Rhat=1). 
## For each parameter, n.eff is a crude measure of effective sample size. 
## 
## overlap0 checks if 0 falls in the parameter's 95% credible interval.
## f is the proportion of the posterior with the same sign as the mean;
## i.e., our confidence that the parameter is positive or negative.
## 
## DIC info: (pD = var(deviance)/2) 
## pD = 3.7 and DIC = 460.118 
## DIC is an estimate of expected predictive error (lower is better).

Vemos que tanto la elevación como la cobertura de bosques tienen coeficientes positivos y distintos de cero mientras que la elevación al cuadrado tiene un coeficiente negativo (implica que hay un máximo de probabilidad de ocurrencia a elevaciones intermedias). Vemos también que la probabilidad de ocurrencia para condiciones promedio es de 0.297. Podemos graficar como cambian las probabilidades de ocurrencia predichas a partir de las posteriores de los coeficientes

op <- par(mfrow = c(1, 2), mar = c(5, 6, 4, 5) + 0.1, mgp = c(3.5, 1, 0), cex.lab = 1.3, 
    font.lab = 2, cex.axis = 1.3, bty = "n", las = 1)
plot(elev, plogis(wb.sim$mean$b0 + wb.sim$mean$b1 * alti + wb.sim$mean$b2 * 
    alti2), xlab = "Elevation", ylab = "Probability of Occurrence")
plot(forest, plogis(wb.sim$mean$b0 + wb.sim$mean$b3 * bosq), xlab = "Forest Cover", 
    ylab = "Probability of Occurrence")

par(op)

Modelo de detección imperfecta

Si consideramos que es posible que no se detecte a la especie en un sitio aunque esté presente, podemos separar la presencia/ausencia "verdadera" de la "observada". Definimos entonces una variable oculta z como una variable de distribución Bernoulli (una Binomial con N=1) cuya probabilidad de éxito (presencia) depende de covariables como en el caso anterior. Luego conectamos esa variable oculta z con las observaciones através de una probabilidad de detección p. En el modelo bugs de abajo el truco es multiplicar a z por p y usar el resultado como probabilidad de éxito de la binomial que modela los datos. Cuando z vale 0, la probabilidad de éxito se hace 0 y tenemos un "cero verdadero" en los datos. Cuando z vale 1, entonces la probabilidad de éxito de la binomial de las observaciones está dada por la probabilidad de detección p y podemos tener falsos negativos.

cat(file = "willow.bug",
    "
    model { 
     for(i in 1:M){ 
       z[i] ~ dbin(psi[i],1)      
       logit(psi[i]) <- b0 + b1 * alti[i] + b2 * alti2[i] + b3 * bosq[i] 
       tmp[i] <- z[i] * p             
       y[i] ~ dbin(tmp[i], J[i])  
    } 
    p ~ dunif(0,1)                
    b0 ~ dnorm(0,.001) 
    b1 ~ dnorm(0,.001) 
    b2 ~ dnorm(0,.001) 
    b3 ~ dnorm(0,.001) 
    logit(psi0)<-b0
    }
    ")

Los datos que usamos para este modelo son los mismos que en el caso anterior. En la función inits tenemos que incluir valores iniciales para z y para p.

data <- list("y", "J", "M", "bosq", "alti", "alti2")
inits <- function() {
    list(z = rbinom(M, 1, 1), b0 = 4, p = runif(1), b1 = rnorm(1), b2 = rnorm(1), 
        b3 = rnorm(1))
}

parameters <- c("p", "b0", "b1", "b2", "b3", "psi0")
ni <- 5000  # número de iteraciones
nt <- 4  # descartamos 'thin' algunos valores
nb <- 1000  # cuantas iteraciones usamos de 'burn in'
nc <- 3  # y cuantas cadenas corremos

w.sim <- jags(data, inits, parameters, "willow.bug", n.chains = nc, n.thin = nt, 
    n.iter = ni, n.burnin = nb)

## 
## Processing function input....... 
## 
## Done. 
##  
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
##    Graph Size: 2469
## 
## Initializing model
## 
## Adaptive phase, 100 iterations x 3 chains 
## If no progress bar appears JAGS has decided not to adapt 
##  
## 
##  Burn-in phase, 1000 iterations x 3 chains 
##  
## 
## Sampling from joint posterior, 4000 iterations x 3 chains 
##  
## 
## Calculating statistics....... 
## 
## Done.

print(w.sim)

## JAGS output for model 'willow.bug', generated by jagsUI.
## Estimates based on 3 chains of 5000 iterations,
## burn-in = 1000 iterations and thin rate = 4,
## yielding 3000 total samples from the joint posterior. 
## MCMC ran for 0.205 minutes at time 2015-09-30 00:01:38.
## 
##             mean     sd    2.5%     50%   97.5% overlap0    f  Rhat n.eff
## p          0.786  0.029   0.726   0.787   0.840    FALSE 1.00 1.000  2649
## b0        -0.203  0.283  -0.758  -0.204   0.359     TRUE 0.77 1.000  3000
## b1         2.070  0.320   1.486   2.057   2.732    FALSE 1.00 1.025    93
## b2        -1.152  0.285  -1.719  -1.144  -0.623    FALSE 1.00 1.010   225
## b3         0.870  0.244   0.407   0.862   1.368    FALSE 1.00 1.001  2171
## psi0       0.450  0.069   0.319   0.449   0.589    FALSE 1.00 1.000  3000
## deviance 173.372 10.610 160.647 170.801 199.093    FALSE 1.00 1.000  3000
## 
## Successful convergence based on Rhat values (all < 1.1). 
## Rhat is the potential scale reduction factor (at convergence, Rhat=1). 
## For each parameter, n.eff is a crude measure of effective sample size. 
## 
## overlap0 checks if 0 falls in the parameter's 95% credible interval.
## f is the proportion of the posterior with the same sign as the mean;
## i.e., our confidence that the parameter is positive or negative.
## 
## DIC info: (pD = var(deviance)/2) 
## pD = 56.3 and DIC = 229.673 
## DIC is an estimate of expected predictive error (lower is better).

Vemos que la probabilidad de detección es relativamente alta y que las variables ambientales tienen coeficientes similares a los de modelo anterior. La probabilidad de ocurrencia en condiciones promedio aumentó. Para graficar la posterior de la probabilidad de ocurrencia en condiciones promedio hacemos:

plot(density(w.sim$sims.list$psi0), main = "")

Podemos comparar gráficamente cómo los dos modelos producen distintos valores esparados de probabilidad de ocupación (el modelo de detección imperfecta está en rojo).

op <- par(mfrow = c(1, 2), mar = c(5, 6, 4, 5) + 0.1, mgp = c(3.5, 1, 0), cex.lab = 1.3, 
    font.lab = 2, cex.axis = 1.3, bty = "n", las = 1)
plot(elev, plogis(wb.sim$mean$b0 + wb.sim$mean$b1 * alti + wb.sim$mean$b2 * 
    alti2), xlab = "Elevation", ylab = "Probability of Occurrence", ylim = c(0, 
    1))
points(elev, plogis(w.sim$mean$b0 + w.sim$mean$b1 * alti + w.sim$mean$b2 * alti2), 
    xlab = "Elevation", ylab = "Probability of Occurrence", col = 2)
plot(forest, plogis(wb.sim$mean$b0 + wb.sim$mean$b3 * bosq), xlab = "Forest Cover", 
    ylab = "Probability of Occurrence", ylim = c(0, 1))
points(forest, plogis(w.sim$mean$b0 + w.sim$mean$b3 * bosq), xlab = "Forest Cover", 
    ylab = "Probability of Occurrence", col = 2)

par(op)

sessionInfo()

## R version 3.2.2 (2015-08-14)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.10.5 (Yosemite)
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] jagsUI_1.3.7    lattice_0.20-33 knitr_1.11     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.8         mime_0.4             grid_3.2.2          
##  [4] knitrBootstrap_1.0.0 formatR_1.2          magrittr_1.5        
##  [7] evaluate_0.7.2       coda_0.17-1          stringi_0.5-5       
## [10] rjags_3-15           rmarkdown_0.8        tools_3.2.2         
## [13] stringr_1.0.0        markdown_0.7.7       parallel_3.2.2      
## [16] yaml_2.1.13          htmltools_0.2.6

Juan Manuel Morales. 6 de Septiembre del 2015. Última actualización: 2015-09-30