## R code 12.1
library(rethinking)
## Loading required package: rstan
## Loading required package: Rcpp
## Loading required package: ggplot2
## rstan (Version 2.8.0, packaged: 2015-09-19 14:48:38 UTC, GitRev: 05c3d0058b6a)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## rstan_options(auto_write = TRUE)
## options(mc.cores = parallel::detectCores())
## Loading required package: parallel
## rethinking (Version 1.57)
data(reedfrogs)
d <- reedfrogs
head(d)
##   density pred  size surv propsurv
## 1      10   no   big    9      0.9
## 2      10   no   big   10      1.0
## 3      10   no   big    7      0.7
## 4      10   no   big   10      1.0
## 5      10   no small    9      0.9
## 6      10   no small    9      0.9
str(d)
## 'data.frame':    48 obs. of  5 variables:
##  $ density : int  10 10 10 10 10 10 10 10 10 10 ...
##  $ pred    : Factor w/ 2 levels "no","pred": 1 1 1 1 1 1 1 1 2 2 ...
##  $ size    : Factor w/ 2 levels "big","small": 1 1 1 1 2 2 2 2 1 1 ...
##  $ surv    : int  9 10 7 10 9 9 10 9 4 9 ...
##  $ propsurv: num  0.9 1 0.7 1 0.9 0.9 1 0.9 0.4 0.9 ...
## R code 12.2
library(rethinking)
data(reedfrogs)
d <- reedfrogs

# make the tank cluster variable
d$tank <- 1:nrow(d)
head(d)
##   density pred  size surv propsurv tank
## 1      10   no   big    9      0.9    1
## 2      10   no   big   10      1.0    2
## 3      10   no   big    7      0.7    3
## 4      10   no   big   10      1.0    4
## 5      10   no small    9      0.9    5
## 6      10   no small    9      0.9    6
# fit
m12.1 <- map2stan(
  alist(
    surv ~ dbinom( density , p ) ,
    logit(p) <- a_tank[tank] ,
    a_tank[tank] ~ dnorm( 0 , 5 )
  ),
  data=d )
## Warning in FUN(X[[i]], ...): data with name pred is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name size is not numeric and not
## used
## 
## SAMPLING FOR MODEL 'surv ~ dbinom(density, p)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%]  (Sampling)
## #  Elapsed Time: 0.316 seconds (Warm-up)
## #                0.36 seconds (Sampling)
## #                0.676 seconds (Total)
## Warning in FUN(X[[i]], ...): data with name pred is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name size is not numeric and not
## used
## 
## SAMPLING FOR MODEL 'surv ~ dbinom(density, p)' NOW (CHAIN 1).
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
## #  Elapsed Time: 0 seconds (Warm-up)
## #                0 seconds (Sampling)
## #                0 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## Aggregated binomial counts detected. Splitting to 0/1 outcome for WAIC calculation.
## R code 12.3
m12.2 <- map2stan(
  alist(
    surv ~ dbinom( density , p ) ,
    logit(p) <- a_tank[tank] ,
    a_tank[tank] ~ dnorm( a , sigma ) ,
    a ~ dnorm(0,1) ,
    sigma ~ dcauchy(0,1)
  ), data=d , iter=4000 , chains=4 )
## Warning in FUN(X[[i]], ...): data with name pred is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name size is not numeric and not
## used
## 
## SAMPLING FOR MODEL 'surv ~ dbinom(density, p)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 4000 [  0%]  (Warmup)
## Chain 1, Iteration:  400 / 4000 [ 10%]  (Warmup)
## Chain 1, Iteration:  800 / 4000 [ 20%]  (Warmup)
## Chain 1, Iteration: 1200 / 4000 [ 30%]  (Warmup)
## Chain 1, Iteration: 1600 / 4000 [ 40%]  (Warmup)
## Chain 1, Iteration: 2000 / 4000 [ 50%]  (Warmup)
## Chain 1, Iteration: 2001 / 4000 [ 50%]  (Sampling)
## Chain 1, Iteration: 2400 / 4000 [ 60%]  (Sampling)
## Chain 1, Iteration: 2800 / 4000 [ 70%]  (Sampling)
## Chain 1, Iteration: 3200 / 4000 [ 80%]  (Sampling)
## Chain 1, Iteration: 3600 / 4000 [ 90%]  (Sampling)
## Chain 1, Iteration: 4000 / 4000 [100%]  (Sampling)
## #  Elapsed Time: 0.532 seconds (Warm-up)
## #                0.465 seconds (Sampling)
## #                0.997 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'surv ~ dbinom(density, p)' NOW (CHAIN 2).
## 
## Chain 2, Iteration:    1 / 4000 [  0%]  (Warmup)
## Chain 2, Iteration:  400 / 4000 [ 10%]  (Warmup)
## Chain 2, Iteration:  800 / 4000 [ 20%]  (Warmup)
## Chain 2, Iteration: 1200 / 4000 [ 30%]  (Warmup)
## Chain 2, Iteration: 1600 / 4000 [ 40%]  (Warmup)
## Chain 2, Iteration: 2000 / 4000 [ 50%]  (Warmup)
## Chain 2, Iteration: 2001 / 4000 [ 50%]  (Sampling)
## Chain 2, Iteration: 2400 / 4000 [ 60%]  (Sampling)
## Chain 2, Iteration: 2800 / 4000 [ 70%]  (Sampling)
## Chain 2, Iteration: 3200 / 4000 [ 80%]  (Sampling)
## Chain 2, Iteration: 3600 / 4000 [ 90%]  (Sampling)
## Chain 2, Iteration: 4000 / 4000 [100%]  (Sampling)
## #  Elapsed Time: 0.528 seconds (Warm-up)
## #                0.462 seconds (Sampling)
## #                0.99 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'surv ~ dbinom(density, p)' NOW (CHAIN 3).
## 
## Chain 3, Iteration:    1 / 4000 [  0%]  (Warmup)
## Chain 3, Iteration:  400 / 4000 [ 10%]  (Warmup)
## Chain 3, Iteration:  800 / 4000 [ 20%]  (Warmup)
## Chain 3, Iteration: 1200 / 4000 [ 30%]  (Warmup)
## Chain 3, Iteration: 1600 / 4000 [ 40%]  (Warmup)
## Chain 3, Iteration: 2000 / 4000 [ 50%]  (Warmup)
## Chain 3, Iteration: 2001 / 4000 [ 50%]  (Sampling)
## Chain 3, Iteration: 2400 / 4000 [ 60%]  (Sampling)
## Chain 3, Iteration: 2800 / 4000 [ 70%]  (Sampling)
## Chain 3, Iteration: 3200 / 4000 [ 80%]  (Sampling)
## Chain 3, Iteration: 3600 / 4000 [ 90%]  (Sampling)
## Chain 3, Iteration: 4000 / 4000 [100%]  (Sampling)
## #  Elapsed Time: 0.538 seconds (Warm-up)
## #                0.453 seconds (Sampling)
## #                0.991 seconds (Total)
## 
## [1] "The following numerical problems occured the indicated number of times on chain 3"
##                                                                                 count
## Exception thrown at line 17: normal_log: Scale parameter is 0, but must be > 0!     1
## [1] "If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,"
## [2] "but if this warning occurs often then your model may be either severely ill-conditioned or misspecified."                              
## 
## SAMPLING FOR MODEL 'surv ~ dbinom(density, p)' NOW (CHAIN 4).
## 
## Chain 4, Iteration:    1 / 4000 [  0%]  (Warmup)
## Chain 4, Iteration:  400 / 4000 [ 10%]  (Warmup)
## Chain 4, Iteration:  800 / 4000 [ 20%]  (Warmup)
## Chain 4, Iteration: 1200 / 4000 [ 30%]  (Warmup)
## Chain 4, Iteration: 1600 / 4000 [ 40%]  (Warmup)
## Chain 4, Iteration: 2000 / 4000 [ 50%]  (Warmup)
## Chain 4, Iteration: 2001 / 4000 [ 50%]  (Sampling)
## Chain 4, Iteration: 2400 / 4000 [ 60%]  (Sampling)
## Chain 4, Iteration: 2800 / 4000 [ 70%]  (Sampling)
## Chain 4, Iteration: 3200 / 4000 [ 80%]  (Sampling)
## Chain 4, Iteration: 3600 / 4000 [ 90%]  (Sampling)
## Chain 4, Iteration: 4000 / 4000 [100%]  (Sampling)
## #  Elapsed Time: 0.532 seconds (Warm-up)
## #                0.46 seconds (Sampling)
## #                0.992 seconds (Total)
## Warning in FUN(X[[i]], ...): data with name pred is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name size is not numeric and not
## used
## 
## SAMPLING FOR MODEL 'surv ~ dbinom(density, p)' NOW (CHAIN 1).
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
## #  Elapsed Time: 0 seconds (Warm-up)
## #                0.01 seconds (Sampling)
## #                0.01 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 800 / 8000 ]
[ 1600 / 8000 ]
[ 2400 / 8000 ]
[ 3200 / 8000 ]
[ 4000 / 8000 ]
[ 4800 / 8000 ]
[ 5600 / 8000 ]
[ 6400 / 8000 ]
[ 7200 / 8000 ]
[ 8000 / 8000 ]
## Aggregated binomial counts detected. Splitting to 0/1 outcome for WAIC calculation.
WAIC(m12.2)
## [1] 1010.173
## attr(,"lppd")
## [1] -467.0945
## attr(,"pWAIC")
## [1] 37.99217
## attr(,"se")
## [1] 37.96536
## R code 12.4
compare( m12.1 , m12.2 )
##         WAIC pWAIC dWAIC weight    SE  dSE
## m12.2 1010.2  38.0   0.0      1 37.97   NA
## m12.1 1025.0  50.2  14.9      0 43.00 6.65
## R code 12.5
# extract Stan samples
post <- extract.samples(m12.2)

# compute median intercept for each tank
# also transform to probability with logistic
d$propsurv.est <- logistic( apply( post$a_tank , 2 , median ) )

# display raw proportions surviving in each tank
plot( d$propsurv , ylim=c(0,1) , pch=16 , xaxt="n" ,
      xlab="tank" , ylab="proportion survival" , col=rangi2 )
axis( 1 , at=c(1,16,32,48) , labels=c(1,16,32,48) )

# overlay posterior medians
points( d$propsurv.est )

# mark posterior median probability across tanks
abline( h=logistic(median(post$a)) , lty=2 )

# draw vertical dividers between tank densities
abline( v=16.5 , lwd=0.5 )
abline( v=32.5 , lwd=0.5 )
text( 8 , 0 , "small tanks" )
text( 16+8 , 0 , "medium tanks" )
text( 32+8 , 0 , "large tanks" )

## R code 12.6
# show first 100 populations in the posterior
plot( NULL , xlim=c(-3,4) , ylim=c(0,0.35) ,
      xlab="log-odds survive" , ylab="Density" )
for ( i in 1:100 )
  curve( dnorm(x,post$a[i],post$sigma[i]) , add=TRUE ,
         col=col.alpha("black",0.2) )

# sample 8-thousand imaginary tanks from the posterior distribution
sim_tanks <- rnorm( 8000 , post$a , post$sigma )

# transform to probability and visualize
dens( logistic(sim_tanks) , xlab="probability survive" )

## R code 12.7
a <- 1.4
sigma <- 1.5
nponds <- 60
ni <- as.integer( rep( c(5,10,25,35) , each=15 ) )

## R code 12.8
a_pond <- rnorm( nponds , mean=a , sd=sigma )

## R code 12.9
dsim <- data.frame( pond=1:nponds , ni=ni , true_a=a_pond )

## R code 12.10
class(1:3)
## [1] "integer"
class(c(1,2,3))
## [1] "numeric"
## R code 12.11
dsim$si <- rbinom( nponds , prob=logistic(dsim$true_a) , size=dsim$ni )

## R code 12.12
dsim$p_nopool <- dsim$si / dsim$ni

## R code 12.13
m12.3 <- map2stan(
  alist(
    si ~ dbinom( ni , p ),
    logit(p) <- a_pond[pond],
    a_pond[pond] ~ dnorm( a , sigma ),
    a ~ dnorm(0,1),
    sigma ~ dcauchy(0,1)
  ),
  data=dsim , iter=1e4 , warmup=1000 )
## 
## SAMPLING FOR MODEL 'si ~ dbinom(ni, p)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 1, Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 1, Iteration: 1001 / 10000 [ 10%]  (Sampling)
## Chain 1, Iteration: 2000 / 10000 [ 20%]  (Sampling)
## Chain 1, Iteration: 3000 / 10000 [ 30%]  (Sampling)
## Chain 1, Iteration: 4000 / 10000 [ 40%]  (Sampling)
## Chain 1, Iteration: 5000 / 10000 [ 50%]  (Sampling)
## Chain 1, Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 1, Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 1, Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 1, Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 1, Iteration: 10000 / 10000 [100%]  (Sampling)
## #  Elapsed Time: 0.412 seconds (Warm-up)
## #                2.591 seconds (Sampling)
## #                3.003 seconds (Total)
## 
## [1] "The following numerical problems occured the indicated number of times on chain 1"
##                                                                                 count
## Exception thrown at line 17: normal_log: Scale parameter is 0, but must be > 0!     1
## [1] "If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,"
## [2] "but if this warning occurs often then your model may be either severely ill-conditioned or misspecified."                              
## 
## SAMPLING FOR MODEL 'si ~ dbinom(ni, p)' NOW (CHAIN 1).
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
## #  Elapsed Time: 0 seconds (Warm-up)
## #                0 seconds (Sampling)
## #                0 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 900 / 9000 ]
[ 1800 / 9000 ]
[ 2700 / 9000 ]
[ 3600 / 9000 ]
[ 4500 / 9000 ]
[ 5400 / 9000 ]
[ 6300 / 9000 ]
[ 7200 / 9000 ]
[ 8100 / 9000 ]
[ 9000 / 9000 ]
## Aggregated binomial counts detected. Splitting to 0/1 outcome for WAIC calculation.
## R code 12.14
precis(m12.3,depth=2)
##             Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a_pond[1]   1.32   0.88      -0.16       2.64  7412    1
## a_pond[2]   2.17   1.02       0.58       3.80  9000    1
## a_pond[3]   0.65   0.80      -0.71       1.83  8603    1
## a_pond[4]   0.03   0.78      -1.17       1.30  9000    1
## a_pond[5]   2.19   1.02       0.59       3.80  9000    1
## a_pond[6]   2.18   0.99       0.59       3.71  9000    1
## a_pond[7]   2.21   1.02       0.51       3.68  9000    1
## a_pond[8]   1.35   0.87       0.01       2.78  9000    1
## a_pond[9]   0.63   0.80      -0.65       1.91  9000    1
## a_pond[10]  1.34   0.87      -0.07       2.70  9000    1
## a_pond[11]  0.03   0.78      -1.22       1.29  8675    1
## a_pond[12]  1.33   0.88      -0.09       2.69  9000    1
## a_pond[13]  1.33   0.87      -0.01       2.77  9000    1
## a_pond[14] -1.29   0.94      -2.76       0.17  5703    1
## a_pond[15]  0.05   0.78      -1.17       1.28  9000    1
## a_pond[16]  1.38   0.70       0.31       2.52  8293    1
## a_pond[17]  1.38   0.69       0.27       2.46  9000    1
## a_pond[18]  1.91   0.78       0.64       3.08  9000    1
## a_pond[19] -0.17   0.59      -1.09       0.79  9000    1
## a_pond[20]  1.38   0.69       0.25       2.45  9000    1
## a_pond[21]  1.92   0.78       0.67       3.13  7406    1
## a_pond[22] -0.89   0.64      -1.88       0.16  9000    1
## a_pond[23]  1.39   0.70       0.26       2.48  9000    1
## a_pond[24]  2.61   0.93       1.11       4.01  9000    1
## a_pond[25] -0.89   0.65      -1.92       0.13  6105    1
## a_pond[26]  1.90   0.78       0.66       3.11  9000    1
## a_pond[27]  1.38   0.70       0.32       2.49  8111    1
## a_pond[28]  0.19   0.59      -0.76       1.10  9000    1
## a_pond[29]  1.91   0.78       0.71       3.18  7363    1
## a_pond[30] -0.16   0.60      -1.10       0.79  8596    1
## a_pond[31]  0.16   0.40      -0.46       0.79  9000    1
## a_pond[32] -1.87   0.56      -2.68      -0.94  9000    1
## a_pond[33]  3.22   0.87       1.80       4.48  5579    1
## a_pond[34]  0.46   0.40      -0.15       1.13  9000    1
## a_pond[35]  0.63   0.40      -0.04       1.24  9000    1
## a_pond[36]  2.23   0.61       1.24       3.15  9000    1
## a_pond[37] -1.88   0.56      -2.71      -0.95  9000    1
## a_pond[38]  1.40   0.48       0.64       2.17  9000    1
## a_pond[39]  2.24   0.61       1.25       3.16  9000    1
## a_pond[40]  0.64   0.41      -0.02       1.29  7773    1
## a_pond[41]  0.47   0.40      -0.19       1.09  9000    1
## a_pond[42] -0.14   0.40      -0.76       0.51  9000    1
## a_pond[43]  1.63   0.50       0.81       2.40  9000    1
## a_pond[44]  1.39   0.47       0.64       2.14  9000    1
## a_pond[45]  2.24   0.61       1.28       3.17  9000    1
## a_pond[46]  1.09   0.38       0.50       1.71  9000    1
## a_pond[47]  1.56   0.43       0.86       2.20  9000    1
## a_pond[48]  1.23   0.39       0.62       1.85  9000    1
## a_pond[49]  3.46   0.83       2.12       4.70  9000    1
## a_pond[50]  1.57   0.43       0.88       2.25  9000    1
## a_pond[51] -0.32   0.33      -0.87       0.20  9000    1
## a_pond[52]  0.34   0.34      -0.19       0.88  9000    1
## a_pond[53]  2.23   0.53       1.35       3.02  7770    1
## a_pond[54]  0.12   0.33      -0.39       0.66  9000    1
## a_pond[55]  1.56   0.43       0.87       2.22  7514    1
## a_pond[56] -0.68   0.35      -1.21      -0.12  9000    1
## a_pond[57]  2.22   0.53       1.33       3.00  8364    1
## a_pond[58] -0.56   0.34      -1.10      -0.02  9000    1
## a_pond[59]  1.76   0.46       1.04       2.49  9000    1
## a_pond[60]  1.76   0.45       1.06       2.48  9000    1
## a           0.98   0.20       0.67       1.30  7125    1
## sigma       1.35   0.18       1.07       1.65  3376    1
## R code 12.15
estimated.a_pond <- as.numeric( coef(m12.3)[1:60] )
dsim$p_partpool <- logistic( estimated.a_pond )

## R code 12.16
dsim$p_true <- logistic( dsim$true_a )

## R code 12.17
nopool_error <- abs( dsim$p_nopool - dsim$p_true )
partpool_error <- abs( dsim$p_partpool - dsim$p_true )

## R code 12.18
plot( 1:60 , nopool_error , xlab="pond" , ylab="absolute error" ,
      col=rangi2 , pch=16 )
points( 1:60 , partpool_error )

## R code 12.19
a <- 1.4
sigma <- 1.5
nponds <- 60
ni <- as.integer( rep( c(5,10,25,35) , each=15 ) )
a_pond <- rnorm( nponds , mean=a , sd=sigma )
dsim <- data.frame( pond=1:nponds , ni=ni , true_a=a_pond )
dsim$si <- rbinom( nponds,prob=logistic( dsim$true_a ),size=dsim$ni )
dsim$p_nopool <- dsim$si / dsim$ni
newdat <- list(si=dsim$si,ni=dsim$ni,pond=1:nponds)
m12.3new <- map2stan( m12.3 , data=newdat , iter=1e4 , warmup=1000 )
## 
## SAMPLING FOR MODEL 'si ~ dbinom(ni, p)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 1, Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 1, Iteration: 1001 / 10000 [ 10%]  (Sampling)
## Chain 1, Iteration: 2000 / 10000 [ 20%]  (Sampling)
## Chain 1, Iteration: 3000 / 10000 [ 30%]  (Sampling)
## Chain 1, Iteration: 4000 / 10000 [ 40%]  (Sampling)
## Chain 1, Iteration: 5000 / 10000 [ 50%]  (Sampling)
## Chain 1, Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 1, Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 1, Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 1, Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 1, Iteration: 10000 / 10000 [100%]  (Sampling)
## #  Elapsed Time: 0.425 seconds (Warm-up)
## #                2.731 seconds (Sampling)
## #                3.156 seconds (Total)
## 
## [1] "The following numerical problems occured the indicated number of times on chain 1"
##                                                                                 count
## Exception thrown at line 17: normal_log: Scale parameter is 0, but must be > 0!     1
## [1] "If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,"
## [2] "but if this warning occurs often then your model may be either severely ill-conditioned or misspecified."                              
## 
## SAMPLING FOR MODEL 'si ~ dbinom(ni, p)' NOW (CHAIN 1).
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
## #  Elapsed Time: 0 seconds (Warm-up)
## #                0.01 seconds (Sampling)
## #                0.01 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 900 / 9000 ]
[ 1800 / 9000 ]
[ 2700 / 9000 ]
[ 3600 / 9000 ]
[ 4500 / 9000 ]
[ 5400 / 9000 ]
[ 6300 / 9000 ]
[ 7200 / 9000 ]
[ 8100 / 9000 ]
[ 9000 / 9000 ]
## Aggregated binomial counts detected. Splitting to 0/1 outcome for WAIC calculation.
## R code 12.20
y1 <- rnorm( 1e4 , 10 , 1 )
y2 <- 10 + rnorm( 1e4 , 0 , 1 )
## R code 12.21
library(rethinking)
data(chimpanzees)
d <- chimpanzees
d$recipient <- NULL     # get rid of NAs
head(d)
##   actor condition block trial prosoc_left chose_prosoc pulled_left
## 1     1         0     1     2           0            1           0
## 2     1         0     1     4           0            0           1
## 3     1         0     1     6           1            0           0
## 4     1         0     1     8           0            1           0
## 5     1         0     1    10           1            1           1
## 6     1         0     1    12           1            1           1
str(d)
## 'data.frame':    504 obs. of  7 variables:
##  $ actor       : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ condition   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ block       : int  1 1 1 1 1 1 2 2 2 2 ...
##  $ trial       : int  2 4 6 8 10 12 14 16 18 20 ...
##  $ prosoc_left : int  0 0 1 0 1 1 1 1 0 0 ...
##  $ chose_prosoc: int  1 0 0 1 1 1 0 0 1 1 ...
##  $ pulled_left : int  0 1 0 0 1 1 0 0 0 0 ...
m12.4 <- map2stan(
  alist(
    pulled_left ~ dbinom( 1 , p ) ,
    logit(p) <- a + a_actor[actor] + (bp + bpC*condition)*prosoc_left ,
    a_actor[actor] ~ dnorm( 0 , sigma_actor ),
    a ~ dnorm(0,10),
    bp ~ dnorm(0,10),
    bpC ~ dnorm(0,10),
    sigma_actor ~ dcauchy(0,1)
  ) ,
  data=d , warmup=1000 , iter=5000 , chains=4 , cores=3 )
## Precompiling model...
## the number of chains is less than 1; sampling not done
## Dispatching 4 chains to 3 cores.
## 
## SAMPLING FOR MODEL 'pulled_left ~ dbinom(1, p)' NOW (CHAIN 1).
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
## #  Elapsed Time: 0 seconds (Warm-up)
## #                0 seconds (Sampling)
## #                0 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 1600 / 16000 ]
[ 3200 / 16000 ]
[ 4800 / 16000 ]
[ 6400 / 16000 ]
[ 8000 / 16000 ]
[ 9600 / 16000 ]
[ 11200 / 16000 ]
[ 12800 / 16000 ]
[ 14400 / 16000 ]
[ 16000 / 16000 ]
## R code 12.22
post <- extract.samples(m12.4)
total_a_actor <- sapply( 1:7 , function(actor) post$a + post$a_actor[,actor] )
round( apply(total_a_actor,2,mean) , 2 )
## [1] -0.71  4.64 -1.02 -1.02 -0.71  0.23  1.77
## R code 12.23
# prep data
d$block_id <- d$block  # name 'block' is reserved by Stan

m12.5 <- map2stan(
  alist(
    pulled_left ~ dbinom( 1 , p ),
    logit(p) <- a + a_actor[actor] + a_block[block_id] +
      (bp + bpc*condition)*prosoc_left,
    a_actor[actor] ~ dnorm( 0 , sigma_actor ),
    a_block[block_id] ~ dnorm( 0 , sigma_block ),
    c(a,bp,bpc) ~ dnorm(0,10),
    sigma_actor ~ dcauchy(0,1),
    sigma_block ~ dcauchy(0,1)
  ) ,
  data=d, warmup=1000 , iter=6000 , chains=4 , cores=3 )
## Precompiling model...
## the number of chains is less than 1; sampling not done
## Dispatching 4 chains to 3 cores.
## Warning: 使われていないコネクション 18 (<-kuma:11633) を閉じます
## Warning: 使われていないコネクション 17 (<-kuma:11633) を閉じます
## Warning: 使われていないコネクション 16 (<-kuma:11633) を閉じます
## 
## SAMPLING FOR MODEL 'pulled_left ~ dbinom(1, p)' NOW (CHAIN 1).
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
## #  Elapsed Time: 0 seconds (Warm-up)
## #                0 seconds (Sampling)
## #                0 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 2000 / 20000 ]
[ 4000 / 20000 ]
[ 6000 / 20000 ]
[ 8000 / 20000 ]
[ 10000 / 20000 ]
[ 12000 / 20000 ]
[ 14000 / 20000 ]
[ 16000 / 20000 ]
[ 18000 / 20000 ]
[ 20000 / 20000 ]
## Warning in map2stan(alist(pulled_left ~ dbinom(1, p), logit(p) <- a + a_actor[actor] + : There were 37 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.
## R code 12.24
precis(m12.5,depth=2) # depth=2 displays varying effects
## Warning in precis(m12.5, depth = 2): There were 37 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.
##              Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a_actor[1]  -1.13   0.99      -2.69       0.29  2565    1
## a_actor[2]   4.21   1.66       1.85       6.50  4052    1
## a_actor[3]  -1.43   0.99      -2.91       0.05  2509    1
## a_actor[4]  -1.44   0.99      -2.94       0.05  2585    1
## a_actor[5]  -1.13   0.99      -2.66       0.34  2538    1
## a_actor[6]  -0.17   1.00      -1.65       1.33  2443    1
## a_actor[7]   1.36   1.02      -0.19       2.90  2729    1
## a_block[1]  -0.18   0.23      -0.56       0.11  3248    1
## a_block[2]   0.04   0.20      -0.25       0.35  7965    1
## a_block[3]   0.06   0.20      -0.23       0.37  7477    1
## a_block[4]   0.01   0.19      -0.28       0.32  9286    1
## a_block[5]  -0.03   0.19      -0.33       0.25  8078    1
## a_block[6]   0.12   0.21      -0.17       0.46  5086    1
## a            0.41   0.98      -1.03       1.91  2431    1
## bp           0.83   0.27       0.36       1.24  1393    1
## bpc         -0.14   0.30      -0.60       0.34  7607    1
## sigma_actor  2.28   0.96       1.04       3.41  3957    1
## sigma_block  0.23   0.19       0.01       0.45  1863    1
plot(precis(m12.5,depth=2)) # also plot

## R code 12.25
post <- extract.samples(m12.5)
dens( post$sigma_block , xlab="sigma" , xlim=c(0,4) )
dens( post$sigma_actor , col=rangi2 , lwd=2 , add=TRUE )
text( 2 , 0.85 , "actor" , col=rangi2 )
text( 0.75 , 2 , "block" )

## R code 12.26
compare(m12.4,m12.5)
##        WAIC pWAIC dWAIC weight    SE  dSE
## m12.4 531.4   8.1   0.0   0.67 19.48   NA
## m12.5 532.8  10.5   1.4   0.33 19.69 1.82
## R code 12.27
chimp <- 2
d.pred <- list(
  prosoc_left = c(0,1,0,1),   # right/left/right/left
  condition = c(0,0,1,1),     # control/control/partner/partner
  actor = rep(chimp,4)
)
link.m12.4 <- link( m12.4 , data=d.pred )
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
pred.p <- apply( link.m12.4 , 2 , mean )
pred.p.PI <- apply( link.m12.4 , 2 , PI )

## R code 12.28
post <- extract.samples(m12.4)
str(post)
## List of 5
##  $ a_actor    : num [1:16000, 1:7] -1.116 0.432 -0.323 -1.046 -1.432 ...
##  $ a          : num [1:16000(1d)] 0.257 -1.202 -0.205 0.228 0.79 ...
##  $ bp         : num [1:16000(1d)] 0.909 0.934 0.657 0.807 0.486 ...
##  $ bpC        : num [1:16000(1d)] -0.373 -0.611 0.239 -0.185 0.159 ...
##  $ sigma_actor: num [1:16000(1d)] 1.46 2.74 1.91 1.01 1.11 ...
## R code 12.29
dens( post$a_actor[,5] )

## R code 12.30
p.link <- function( prosoc_left , condition , actor ) {
  logodds <- with( post ,
                   a + a_actor[,actor] + (bp + bpC * condition) * prosoc_left
  )
  return( logistic(logodds) )
}

## R code 12.31
prosoc_left <- c(0,1,0,1)
condition <- c(0,0,1,1)
pred.raw <- sapply( 1:4 , function(i) p.link( prosoc_left[i] , condition[i] , 2 ) )
pred.p <- apply( pred.raw , 2 , mean )
pred.p.PI <- apply( pred.raw , 2 , PI )

## R code 12.32
d.pred <- list(
  prosoc_left = c(0,1,0,1),   # right/left/right/left
  condition = c(0,0,1,1),     # control/control/partner/partner
  actor = rep(2,4) )          # placeholder

## R code 12.33
# replace varying intercept samples with zeros
# 1000 samples by 7 actors
a_actor_zeros <- matrix(0,1000,7)

## R code 12.34
# fire up link
# note use of replace list
link.m12.4 <- link( m12.4 , n=1000 , data=d.pred ,
                    replace=list(a_actor=a_actor_zeros) )
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
# summarize and plot
pred.p.mean <- apply( link.m12.4 , 2 , mean )
pred.p.PI <- apply( link.m12.4 , 2 , PI , prob=0.8 )
plot( 0 , 0 , type="n" , xlab="prosoc_left/condition" ,
      ylab="proportion pulled left" , ylim=c(0,1) , xaxt="n" ,
      xlim=c(1,4) )
axis( 1 , at=1:4 , labels=c("0/0","1/0","0/1","1/1") )
lines( 1:4 , pred.p.mean )
shade( pred.p.PI , 1:4 )

## R code 12.35
# replace varying intercept samples with simulations
post <- extract.samples(m12.4)
a_actor_sims <- rnorm(7000,0,post$sigma_actor)
a_actor_sims <- matrix(a_actor_sims,1000,7)

## R code 12.36
link.m12.4 <- link( m12.4 , n=1000 , data=d.pred ,
                    replace=list(a_actor=a_actor_sims) )
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## R code 12.37
post <- extract.samples(m12.4)
sim.actor <- function(i) {
  sim_a_actor <- rnorm( 1 , 0 , post$sigma_actor[i] )
  P <- c(0,1,0,1)
  C <- c(0,0,1,1)
  p <- logistic(
    post$a[i] +
      sim_a_actor +
      (post$bp[i] + post$bpC[i]*C)*P
  )
  return(p)
}

## R code 12.38
# empty plot
plot( 0 , 0 , type="n" , xlab="prosoc_left/condition" ,
      ylab="proportion pulled left" , ylim=c(0,1) , xaxt="n" , xlim=c(1,4) )
axis( 1 , at=1:4 , labels=c("0/0","1/0","0/1","1/1") )

# plot 50 simulated actors
for ( i in 1:50 ) lines( 1:4 , sim.actor(i) , col=col.alpha("black",0.5) )

## R code 12.39
# prep data
library(rethinking)
data(Kline)
d <- Kline
d$logpop <- log(d$population)
d$society <- 1:10
head(d)
##      culture population contact total_tools mean_TU   logpop society
## 1   Malekula       1100     low          13     3.2 7.003065       1
## 2    Tikopia       1500     low          22     4.7 7.313220       2
## 3 Santa Cruz       3600     low          24     4.0 8.188689       3
## 4        Yap       4791    high          43     5.0 8.474494       4
## 5   Lau Fiji       7400    high          33     5.0 8.909235       5
## 6  Trobriand       8000    high          19     4.0 8.987197       6
# fit model
m12.6 <- map2stan(
  alist(
    total_tools ~ dpois(mu),
    log(mu) <- a + a_society[society] + bp*logpop,
    a ~ dnorm(0,10),
    bp ~ dnorm(0,1),
    a_society[society] ~ dnorm(0,sigma_society),
    sigma_society ~ dcauchy(0,1)
  ),
  data=d ,
  iter=4000 , chains=3 )
## Warning in FUN(X[[i]], ...): data with name culture is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name contact is not numeric and not
## used
## 
## SAMPLING FOR MODEL 'total_tools ~ dpois(mu)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 4000 [  0%]  (Warmup)
## Chain 1, Iteration:  400 / 4000 [ 10%]  (Warmup)
## Chain 1, Iteration:  800 / 4000 [ 20%]  (Warmup)
## Chain 1, Iteration: 1200 / 4000 [ 30%]  (Warmup)
## Chain 1, Iteration: 1600 / 4000 [ 40%]  (Warmup)
## Chain 1, Iteration: 2000 / 4000 [ 50%]  (Warmup)
## Chain 1, Iteration: 2001 / 4000 [ 50%]  (Sampling)
## Chain 1, Iteration: 2400 / 4000 [ 60%]  (Sampling)
## Chain 1, Iteration: 2800 / 4000 [ 70%]  (Sampling)
## Chain 1, Iteration: 3200 / 4000 [ 80%]  (Sampling)
## Chain 1, Iteration: 3600 / 4000 [ 90%]  (Sampling)
## Chain 1, Iteration: 4000 / 4000 [100%]  (Sampling)
## #  Elapsed Time: 0.781 seconds (Warm-up)
## #                0.775 seconds (Sampling)
## #                1.556 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'total_tools ~ dpois(mu)' NOW (CHAIN 2).
## 
## Chain 2, Iteration:    1 / 4000 [  0%]  (Warmup)
## Chain 2, Iteration:  400 / 4000 [ 10%]  (Warmup)
## Chain 2, Iteration:  800 / 4000 [ 20%]  (Warmup)
## Chain 2, Iteration: 1200 / 4000 [ 30%]  (Warmup)
## Chain 2, Iteration: 1600 / 4000 [ 40%]  (Warmup)
## Chain 2, Iteration: 2000 / 4000 [ 50%]  (Warmup)
## Chain 2, Iteration: 2001 / 4000 [ 50%]  (Sampling)
## Chain 2, Iteration: 2400 / 4000 [ 60%]  (Sampling)
## Chain 2, Iteration: 2800 / 4000 [ 70%]  (Sampling)
## Chain 2, Iteration: 3200 / 4000 [ 80%]  (Sampling)
## Chain 2, Iteration: 3600 / 4000 [ 90%]  (Sampling)
## Chain 2, Iteration: 4000 / 4000 [100%]  (Sampling)
## #  Elapsed Time: 0.804 seconds (Warm-up)
## #                0.784 seconds (Sampling)
## #                1.588 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'total_tools ~ dpois(mu)' NOW (CHAIN 3).
## 
## Chain 3, Iteration:    1 / 4000 [  0%]  (Warmup)
## Chain 3, Iteration:  400 / 4000 [ 10%]  (Warmup)
## Chain 3, Iteration:  800 / 4000 [ 20%]  (Warmup)
## Chain 3, Iteration: 1200 / 4000 [ 30%]  (Warmup)
## Chain 3, Iteration: 1600 / 4000 [ 40%]  (Warmup)
## Chain 3, Iteration: 2000 / 4000 [ 50%]  (Warmup)
## Chain 3, Iteration: 2001 / 4000 [ 50%]  (Sampling)
## Chain 3, Iteration: 2400 / 4000 [ 60%]  (Sampling)
## Chain 3, Iteration: 2800 / 4000 [ 70%]  (Sampling)
## Chain 3, Iteration: 3200 / 4000 [ 80%]  (Sampling)
## Chain 3, Iteration: 3600 / 4000 [ 90%]  (Sampling)
## Chain 3, Iteration: 4000 / 4000 [100%]  (Sampling)
## #  Elapsed Time: 0.764 seconds (Warm-up)
## #                0.773 seconds (Sampling)
## #                1.537 seconds (Total)
## 
## [1] "The following numerical problems occured the indicated number of times on chain 3"
##                                                                                 count
## Exception thrown at line 17: normal_log: Scale parameter is 0, but must be > 0!     1
## [1] "If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,"
## [2] "but if this warning occurs often then your model may be either severely ill-conditioned or misspecified."
## Warning in FUN(X[[i]], ...): data with name culture is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name contact is not numeric and not
## used
## 
## SAMPLING FOR MODEL 'total_tools ~ dpois(mu)' NOW (CHAIN 1).
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
## #  Elapsed Time: 0 seconds (Warm-up)
## #                0 seconds (Sampling)
## #                0 seconds (Total)
## Warning: 使われていないコネクション 22 (<-kuma:11633) を閉じます
## Warning: 使われていないコネクション 21 (<-kuma:11633) を閉じます
## Warning: 使われていないコネクション 20 (<-kuma:11633) を閉じます
## Computing WAIC
## Constructing posterior predictions
## [ 600 / 6000 ]
[ 1200 / 6000 ]
[ 1800 / 6000 ]
[ 2400 / 6000 ]
[ 3000 / 6000 ]
[ 3600 / 6000 ]
[ 4200 / 6000 ]
[ 4800 / 6000 ]
[ 5400 / 6000 ]
[ 6000 / 6000 ]
## R code 12.40
post <- extract.samples(m12.6)
d.pred <- list(
  logpop = seq(from=6,to=14,length.out=30),
  society = rep(1,30)
)
a_society_sims <- rnorm(20000,0,post$sigma_society)
a_society_sims <- matrix(a_society_sims,2000,10)
link.m12.6 <- link( m12.6 , n=2000 , data=d.pred ,
                    replace=list(a_society=a_society_sims) )
## [ 200 / 2000 ]
[ 400 / 2000 ]
[ 600 / 2000 ]
[ 800 / 2000 ]
[ 1000 / 2000 ]
[ 1200 / 2000 ]
[ 1400 / 2000 ]
[ 1600 / 2000 ]
[ 1800 / 2000 ]
[ 2000 / 2000 ]
## R code 12.41
# plot raw data
plot( d$logpop , d$total_tools , col=rangi2 , pch=16 ,
      xlab="log population" , ylab="total tools" )

# plot posterior median
mu.median <- apply( link.m12.6 , 2 , median )
lines( d.pred$logpop , mu.median )

# plot 97%, 89%, and 67% intervals (all prime numbers)
mu.PI <- apply( link.m12.6 , 2 , PI , prob=0.97 )
shade( mu.PI , d.pred$logpop )
mu.PI <- apply( link.m12.6 , 2 , PI , prob=0.89 )
shade( mu.PI , d.pred$logpop )
mu.PI <- apply( link.m12.6 , 2 , PI , prob=0.67 )
shade( mu.PI , d.pred$logpop )

## R code 12.42
sort(unique(d$district))

## R code 12.43
d$district_id <- as.integer(as.factor(d$district))
sort(unique(d$district_id))