## R code 10.1
library(rethinking)
## Loading required package: rstan
## Loading required package: Rcpp
## Loading required package: ggplot2
## rstan (Version 2.8.0, packaged: 2015-09-19 14:48:38 UTC, GitRev: 05c3d0058b6a)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## rstan_options(auto_write = TRUE)
## options(mc.cores = parallel::detectCores())
## Loading required package: parallel
## rethinking (Version 1.57)
data(chimpanzees)
d <- chimpanzees
head(d)
##   actor recipient condition block trial prosoc_left chose_prosoc
## 1     1        NA         0     1     2           0            1
## 2     1        NA         0     1     4           0            0
## 3     1        NA         0     1     6           1            0
## 4     1        NA         0     1     8           0            1
## 5     1        NA         0     1    10           1            1
## 6     1        NA         0     1    12           1            1
##   pulled_left
## 1           0
## 2           1
## 3           0
## 4           0
## 5           1
## 6           1
summary(d)
##      actor     recipient     condition       block         trial      
##  Min.   :1   Min.   :2     Min.   :0.0   Min.   :1.0   Min.   : 1.00  
##  1st Qu.:2   1st Qu.:3     1st Qu.:0.0   1st Qu.:2.0   1st Qu.:18.00  
##  Median :4   Median :5     Median :0.5   Median :3.5   Median :36.00  
##  Mean   :4   Mean   :5     Mean   :0.5   Mean   :3.5   Mean   :36.38  
##  3rd Qu.:6   3rd Qu.:7     3rd Qu.:1.0   3rd Qu.:5.0   3rd Qu.:54.00  
##  Max.   :7   Max.   :8     Max.   :1.0   Max.   :6.0   Max.   :72.00  
##              NA's   :252                                              
##   prosoc_left   chose_prosoc     pulled_left    
##  Min.   :0.0   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.5   Median :1.0000   Median :1.0000  
##  Mean   :0.5   Mean   :0.5675   Mean   :0.5794  
##  3rd Qu.:1.0   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.0   Max.   :1.0000   Max.   :1.0000  
## 
dim(d)
## [1] 504   8
## R code 10.2
m10.1 <- map(
  alist(
    pulled_left ~ dbinom( 1 , p ) ,
    logit(p) <- a ,
    a ~ dnorm(0,10)
  ) ,
  data=d )
precis(m10.1)
##   Mean StdDev 5.5% 94.5%
## a 0.32   0.09 0.18  0.46
## R code 10.3
logistic( c(0.18,0.46) )
## [1] 0.5448789 0.6130142
## R code 10.4
m10.2 <- map(
  alist(
    pulled_left ~ dbinom( 1 , p ) ,
    logit(p) <- a + bp*prosoc_left ,
    a ~ dnorm(0,10) ,
    bp ~ dnorm(0,10)
  ) ,
  data=d )
m10.3 <- map(
  alist(
    pulled_left ~ dbinom( 1 , p ) ,
    logit(p) <- a + (bp + bpC*condition)*prosoc_left ,
    a ~ dnorm(0,10) ,
    bp ~ dnorm(0,10) ,
    bpC ~ dnorm(0,10)
  ) ,
  data=d )

## R code 10.5
compare( m10.1 , m10.2 , m10.3 )
##        WAIC pWAIC dWAIC weight   SE  dSE
## m10.2 680.5     2   0.0   0.70 9.26   NA
## m10.3 682.3     3   1.8   0.28 9.38 0.85
## m10.1 687.9     1   7.4   0.02 7.09 6.10
## R code 10.6
precis(m10.3)
##      Mean StdDev  5.5% 94.5%
## a    0.05   0.13 -0.15  0.25
## bp   0.61   0.23  0.25  0.97
## bpC -0.10   0.26 -0.53  0.32
## R code 10.7
exp(0.61)
## [1] 1.840431
## R code 10.8
logistic( 4 )
## [1] 0.9820138
## R code 10.9
logistic( 4 + 0.61 )
## [1] 0.9901462
## R code 10.10
# dummy data for predictions across treatments
d.pred <- data.frame(
  prosoc_left = c(0,1,0,1),   # right/left/right/left
  condition = c(0,0,1,1)      # control/control/partner/partner
)

# build prediction ensemble
chimp.ensemble <- ensemble( m10.1 , m10.2 ,
                      m10.3 , data=d.pred )
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
# summarize
pred.p <- apply( chimp.ensemble$link , 2 ,
                 mean )
pred.p.PI <- apply( chimp.ensemble$link , 2 ,
                    PI )

## R code 10.11
# empty plot frame with good axes
plot( 0 , 0 , type="n" , xlab="prosoc_left/condition" ,
      ylab="proportion pulled left" , ylim=c(0,1) , xaxt="n" ,
      xlim=c(1,4) )
axis( 1 , at=1:4 , labels=c("0/0","1/0","0/1","1/1") )

# plot raw data, one trend for each of 7 individual chimpanzees
# will use by() here; see Overthinking box for explanation
p <- by( d$pulled_left ,
         list(d$prosoc_left,d$condition,d$actor) , mean )
for ( chimp in 1:7 )
  lines( 1:4 , as.vector(p[,,chimp]) , col=rangi2 )

# now superimpose posterior predictions
lines( 1:4 , pred.p )
shade( pred.p.PI , 1:4 )

## R code 10.12
# clean NAs from the data
d2 <- d
d2$recipient <- NULL

# re-use map fit to get the formula
m10.3stan <- map2stan( m10.3 , data=d2 , iter=1e4 , warmup=1000 )
## 
## SAMPLING FOR MODEL 'pulled_left ~ dbinom(1, p)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 1, Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 1, Iteration: 1001 / 10000 [ 10%]  (Sampling)
## Chain 1, Iteration: 2000 / 10000 [ 20%]  (Sampling)
## Chain 1, Iteration: 3000 / 10000 [ 30%]  (Sampling)
## Chain 1, Iteration: 4000 / 10000 [ 40%]  (Sampling)
## Chain 1, Iteration: 5000 / 10000 [ 50%]  (Sampling)
## Chain 1, Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 1, Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 1, Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 1, Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 1, Iteration: 10000 / 10000 [100%]  (Sampling)
## #  Elapsed Time: 1.953 seconds (Warm-up)
## #                15.129 seconds (Sampling)
## #                17.082 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'pulled_left ~ dbinom(1, p)' NOW (CHAIN 1).
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
## #  Elapsed Time: 0 seconds (Warm-up)
## #                0 seconds (Sampling)
## #                0 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 900 / 9000 ]
[ 1800 / 9000 ]
[ 2700 / 9000 ]
[ 3600 / 9000 ]
[ 4500 / 9000 ]
[ 5400 / 9000 ]
[ 6300 / 9000 ]
[ 7200 / 9000 ]
[ 8100 / 9000 ]
[ 9000 / 9000 ]
precis(m10.3stan)
##      Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a    0.05   0.12      -0.15       0.24  3198    1
## bp   0.61   0.22       0.26       0.96  3121    1
## bpC -0.11   0.27      -0.55       0.29  3107    1
## R code 10.13
pairs(m10.3stan)

## R code 10.14
m10.4 <- map2stan(
  alist(
    pulled_left ~ dbinom( 1 , p ) ,
    logit(p) <- a[actor] + (bp + bpC*condition)*prosoc_left ,
    a[actor] ~ dnorm(0,10),
    bp ~ dnorm(0,10),
    bpC ~ dnorm(0,10)
  ) ,
  data=d2 , chains=2 , iter=2500 , warmup=500 )
## 
## SAMPLING FOR MODEL 'pulled_left ~ dbinom(1, p)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2500 [  0%]  (Warmup)
## Chain 1, Iteration:  250 / 2500 [ 10%]  (Warmup)
## Chain 1, Iteration:  500 / 2500 [ 20%]  (Warmup)
## Chain 1, Iteration:  501 / 2500 [ 20%]  (Sampling)
## Chain 1, Iteration:  750 / 2500 [ 30%]  (Sampling)
## Chain 1, Iteration: 1000 / 2500 [ 40%]  (Sampling)
## Chain 1, Iteration: 1250 / 2500 [ 50%]  (Sampling)
## Chain 1, Iteration: 1500 / 2500 [ 60%]  (Sampling)
## Chain 1, Iteration: 1750 / 2500 [ 70%]  (Sampling)
## Chain 1, Iteration: 2000 / 2500 [ 80%]  (Sampling)
## Chain 1, Iteration: 2250 / 2500 [ 90%]  (Sampling)
## Chain 1, Iteration: 2500 / 2500 [100%]  (Sampling)
## #  Elapsed Time: 1.586 seconds (Warm-up)
## #                4.563 seconds (Sampling)
## #                6.149 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'pulled_left ~ dbinom(1, p)' NOW (CHAIN 2).
## 
## Chain 2, Iteration:    1 / 2500 [  0%]  (Warmup)
## Chain 2, Iteration:  250 / 2500 [ 10%]  (Warmup)
## Chain 2, Iteration:  500 / 2500 [ 20%]  (Warmup)
## Chain 2, Iteration:  501 / 2500 [ 20%]  (Sampling)
## Chain 2, Iteration:  750 / 2500 [ 30%]  (Sampling)
## Chain 2, Iteration: 1000 / 2500 [ 40%]  (Sampling)
## Chain 2, Iteration: 1250 / 2500 [ 50%]  (Sampling)
## Chain 2, Iteration: 1500 / 2500 [ 60%]  (Sampling)
## Chain 2, Iteration: 1750 / 2500 [ 70%]  (Sampling)
## Chain 2, Iteration: 2000 / 2500 [ 80%]  (Sampling)
## Chain 2, Iteration: 2250 / 2500 [ 90%]  (Sampling)
## Chain 2, Iteration: 2500 / 2500 [100%]  (Sampling)
## #  Elapsed Time: 1.71 seconds (Warm-up)
## #                5.756 seconds (Sampling)
## #                7.466 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'pulled_left ~ dbinom(1, p)' NOW (CHAIN 1).
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
## #  Elapsed Time: 0 seconds (Warm-up)
## #                0 seconds (Sampling)
## #                0 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 400 / 4000 ]
[ 800 / 4000 ]
[ 1200 / 4000 ]
[ 1600 / 4000 ]
[ 2000 / 4000 ]
[ 2400 / 4000 ]
[ 2800 / 4000 ]
[ 3200 / 4000 ]
[ 3600 / 4000 ]
[ 4000 / 4000 ]
## Warning in map2stan(alist(pulled_left ~ dbinom(1, p), logit(p) <- a[actor] + : There were 10 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.
## R code 10.15
unique( d$actor )
## [1] 1 2 3 4 5 6 7
## R code 10.16
precis( m10.4 , depth=2 )
## Warning in precis(m10.4, depth = 2): There were 10 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.
##       Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a[1] -0.75   0.28      -1.17      -0.29  2379    1
## a[2] 10.89   5.42       3.58      18.65  1057    1
## a[3] -1.05   0.29      -1.53      -0.62  2074    1
## a[4] -1.05   0.29      -1.49      -0.58  1583    1
## a[5] -0.74   0.27      -1.17      -0.32  2140    1
## a[6]  0.22   0.27      -0.20       0.64  2601    1
## a[7]  1.81   0.39       1.21       2.46  2603    1
## bp    0.84   0.27       0.40       1.28  1360    1
## bpC  -0.14   0.31      -0.68       0.33  1775    1
## R code 10.17
post <- extract.samples( m10.4 )
str( post )
## List of 3
##  $ a  : num [1:4000, 1:7] -0.462 -0.66 -0.6 -0.423 -0.786 ...
##  $ bp : num [1:4000(1d)] 0.335 1.012 1.159 1.046 0.352 ...
##  $ bpC: num [1:4000(1d)] 0.398 -0.48 -0.483 -1.006 0.052 ...
## R code 10.18
dens( post$a[,2] )

## R code 10.19
chimp <- 1
d.pred <- list(
  pulled_left = rep( 0 , 4 ), # empty outcome
  prosoc_left = c(0,1,0,1),   # right/left/right/left
  condition = c(0,0,1,1),     # control/control/partner/partner
  actor = rep(chimp,4)
)
link.m10.4 <- link( m10.4 , data=d.pred )
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
pred.p <- apply( link.m10.4 , 2 , mean )
pred.p.PI <- apply( link.m10.4 , 2 , PI )

plot( 0 , 0 , type="n" , xlab="prosoc_left/condition" ,
      ylab="proportion pulled left" , ylim=c(0,1) , xaxt="n" ,
      xlim=c(1,4) , yaxp=c(0,1,2) )
axis( 1 , at=1:4 , labels=c("0/0","1/0","0/1","1/1") )
mtext( paste( "actor" , chimp ) )

p <- by( d$pulled_left ,
         list(d$prosoc_left,d$condition,d$actor) , mean )
lines( 1:4 , as.vector(p[,,chimp]) , col=rangi2 )

lines( 1:4 , pred.p )
shade( pred.p.PI , 1:4 )

## R code 10.20
data(chimpanzees)
d <- chimpanzees
d.aggregated <- aggregate( d$pulled_left ,
                           list(prosoc_left=d$prosoc_left,condition=d$condition,actor=d$actor) ,
                           sum )
head(d.aggregated)
##   prosoc_left condition actor  x
## 1           0         0     1  6
## 2           1         0     1  9
## 3           0         1     1  5
## 4           1         1     1 10
## 5           0         0     2 18
## 6           1         0     2 18
## R code 10.21
m10.5 <- map(
  alist(
    x ~ dbinom( 18 , p ) ,
    logit(p) <- a + (bp + bpC*condition)*prosoc_left ,
    a ~ dnorm(0,10) ,
    bp ~ dnorm(0,10) ,
    bpC ~ dnorm(0,10)
  ) ,
  data=d.aggregated )
## R code 10.22
library(rethinking)
data(UCBadmit)
d <- UCBadmit
head(d)
##   dept applicant.gender admit reject applications
## 1    A             male   512    313          825
## 2    A           female    89     19          108
## 3    B             male   353    207          560
## 4    B           female    17      8           25
## 5    C             male   120    205          325
## 6    C           female   202    391          593
## R code 10.23
d$male <- ifelse( d$applicant.gender=="male" , 1 , 0 )
m10.6 <- map(
  alist(
    admit ~ dbinom( applications , p ) ,
    logit(p) <- a + bm*male ,
    a ~ dnorm(0,10) ,
    bm ~ dnorm(0,10)
  ) ,
  data=d )
m10.7 <- map(
  alist(
    admit ~ dbinom( applications , p ) ,
    logit(p) <- a ,
    a ~ dnorm(0,10)
  ) ,
  data=d )

## R code 10.24
compare( m10.6 , m10.7 )
##         WAIC pWAIC dWAIC weight    SE   dSE
## m10.6 5954.8   2.0   0.0      1 34.92    NA
## m10.7 6046.5   1.1  91.7      0 29.88 19.16
## R code 10.25
precis(m10.6)
##     Mean StdDev  5.5% 94.5%
## a  -0.83   0.05 -0.91 -0.75
## bm  0.61   0.06  0.51  0.71
## R code 10.26
post <- extract.samples( m10.6 )
p.admit.male <- logistic( post$a + post$bm )
p.admit.female <- logistic( post$a )
diff.admit <- p.admit.male - p.admit.female
quantile( diff.admit , c(0.025,0.5,0.975) )
##      2.5%       50%     97.5% 
## 0.1134991 0.1417073 0.1693617
## R code 10.27
postcheck( m10.6 , n=1e4 )
## [ 1000 / 10000 ]
[ 2000 / 10000 ]
[ 3000 / 10000 ]
[ 4000 / 10000 ]
[ 5000 / 10000 ]
[ 6000 / 10000 ]
[ 7000 / 10000 ]
[ 8000 / 10000 ]
[ 9000 / 10000 ]
[ 10000 / 10000 ]
## [ 1000 / 10000 ]
[ 2000 / 10000 ]
[ 3000 / 10000 ]
[ 4000 / 10000 ]
[ 5000 / 10000 ]
[ 6000 / 10000 ]
[ 7000 / 10000 ]
[ 8000 / 10000 ]
[ 9000 / 10000 ]
[ 10000 / 10000 ]
# draw lines connecting points from same dept
for ( i in 1:6 ) {
  x <- 1 + 2*(i-1)
  y1 <- d$admit[x]/d$applications[x]
  y2 <- d$admit[x+1]/d$applications[x+1]
  lines( c(x,x+1) , c(y1,y2) , col=rangi2 , lwd=2 )
  text( x+0.5 , (y1+y2)/2 + 0.05 , d$dept[x] , cex=0.8 , col=rangi2 )
}

## R code 10.28
# make index
d$dept_id <- coerce_index( d$dept )
head(d)
##   dept applicant.gender admit reject applications male dept_id
## 1    A             male   512    313          825    1       1
## 2    A           female    89     19          108    0       1
## 3    B             male   353    207          560    1       2
## 4    B           female    17      8           25    0       2
## 5    C             male   120    205          325    1       3
## 6    C           female   202    391          593    0       3
# model with unique intercept for each dept
m10.8 <- map(
  alist(
    admit ~ dbinom( applications , p ) ,
    logit(p) <- a[dept_id] ,
    a[dept_id] ~ dnorm(0,10)
  ) , data=d )

# model with male difference as well
m10.9 <- map(
  alist(
    admit ~ dbinom( applications , p ) ,
    logit(p) <- a[dept_id] + bm*male ,
    a[dept_id] ~ dnorm(0,10) ,
    bm ~ dnorm(0,10)
  ) , data=d )

## R code 10.29
compare( m10.6 , m10.7 , m10.8 , m10.9 )
##         WAIC pWAIC dWAIC weight    SE   dSE
## m10.8 5201.1   6.0   0.0   0.53 57.00    NA
## m10.9 5201.3   6.9   0.3   0.47 57.07  2.48
## m10.6 5954.9   2.0 753.8   0.00 35.05 48.56
## m10.7 6046.3   1.0 845.2   0.00 30.06 52.40
## R code 10.30
precis( m10.9 , depth=2 )
##       Mean StdDev  5.5% 94.5%
## a[1]  0.68   0.10  0.52  0.84
## a[2]  0.64   0.12  0.45  0.82
## a[3] -0.58   0.07 -0.70 -0.46
## a[4] -0.61   0.09 -0.75 -0.48
## a[5] -1.06   0.10 -1.22 -0.90
## a[6] -2.62   0.16 -2.88 -2.37
## bm   -0.10   0.08 -0.23  0.03
## R code 10.31
m10.9stan <- map2stan( m10.9 , chains=2 , 
                       iter=2500 , warmup=500 )
## Warning: Variable 'applicant.gender' contains dots '.'.
## Will attempt to remove dots internally.
## Warning in FUN(X[[i]], ...): data with name dept is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name applicant_gender is not numeric
## and not used
## 
## SAMPLING FOR MODEL 'admit ~ dbinom(applications, p)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2500 [  0%]  (Warmup)
## Chain 1, Iteration:  250 / 2500 [ 10%]  (Warmup)
## Chain 1, Iteration:  500 / 2500 [ 20%]  (Warmup)
## Chain 1, Iteration:  501 / 2500 [ 20%]  (Sampling)
## Chain 1, Iteration:  750 / 2500 [ 30%]  (Sampling)
## Chain 1, Iteration: 1000 / 2500 [ 40%]  (Sampling)
## Chain 1, Iteration: 1250 / 2500 [ 50%]  (Sampling)
## Chain 1, Iteration: 1500 / 2500 [ 60%]  (Sampling)
## Chain 1, Iteration: 1750 / 2500 [ 70%]  (Sampling)
## Chain 1, Iteration: 2000 / 2500 [ 80%]  (Sampling)
## Chain 1, Iteration: 2250 / 2500 [ 90%]  (Sampling)
## Chain 1, Iteration: 2500 / 2500 [100%]  (Sampling)
## #  Elapsed Time: 0.048 seconds (Warm-up)
## #                0.172 seconds (Sampling)
## #                0.22 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'admit ~ dbinom(applications, p)' NOW (CHAIN 2).
## 
## Chain 2, Iteration:    1 / 2500 [  0%]  (Warmup)
## Chain 2, Iteration:  250 / 2500 [ 10%]  (Warmup)
## Chain 2, Iteration:  500 / 2500 [ 20%]  (Warmup)
## Chain 2, Iteration:  501 / 2500 [ 20%]  (Sampling)
## Chain 2, Iteration:  750 / 2500 [ 30%]  (Sampling)
## Chain 2, Iteration: 1000 / 2500 [ 40%]  (Sampling)
## Chain 2, Iteration: 1250 / 2500 [ 50%]  (Sampling)
## Chain 2, Iteration: 1500 / 2500 [ 60%]  (Sampling)
## Chain 2, Iteration: 1750 / 2500 [ 70%]  (Sampling)
## Chain 2, Iteration: 2000 / 2500 [ 80%]  (Sampling)
## Chain 2, Iteration: 2250 / 2500 [ 90%]  (Sampling)
## Chain 2, Iteration: 2500 / 2500 [100%]  (Sampling)
## #  Elapsed Time: 0.046 seconds (Warm-up)
## #                0.157 seconds (Sampling)
## #                0.203 seconds (Total)
## Warning in FUN(X[[i]], ...): data with name dept is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name applicant_gender is not numeric
## and not used
## 
## SAMPLING FOR MODEL 'admit ~ dbinom(applications, p)' NOW (CHAIN 1).
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
## #  Elapsed Time: 0 seconds (Warm-up)
## #                0 seconds (Sampling)
## #                0 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 400 / 4000 ]
[ 800 / 4000 ]
[ 1200 / 4000 ]
[ 1600 / 4000 ]
[ 2000 / 4000 ]
[ 2400 / 4000 ]
[ 2800 / 4000 ]
[ 3200 / 4000 ]
[ 3600 / 4000 ]
[ 4000 / 4000 ]
## Aggregated binomial counts detected. Splitting to 0/1 outcome for WAIC calculation.
precis(m10.9stan,depth=2)
##       Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a[1]  0.68   0.10       0.52       0.84  1365    1
## a[2]  0.64   0.12       0.43       0.81  1355    1
## a[3] -0.58   0.07      -0.70      -0.46  2535    1
## a[4] -0.62   0.09      -0.75      -0.47  1870    1
## a[5] -1.06   0.10      -1.22      -0.90  2346    1
## a[6] -2.64   0.16      -2.88      -2.36  2388    1
## bm   -0.10   0.08      -0.24       0.03  1038    1
## R code 10.32
m10.7glm <- glm( cbind(admit,reject) ~ 1 , data=d , family=binomial )
m10.6glm <- glm( cbind(admit,reject) ~ male , data=d , family=binomial )
m10.8glm <- glm( cbind(admit,reject) ~ dept , data=d , family=binomial )
m10.9glm <- glm( cbind(admit,reject) ~ male + dept , data=d ,
                 family=binomial )
## R code 10.33
data(chimpanzees)
m10.4glm <- glm(
  pulled_left ~ as.factor(actor) + prosoc_left * condition - condition ,
  data=chimpanzees , family=binomial )
## R code 10.34
glimmer( pulled_left ~ prosoc_left * condition - condition ,
         data=chimpanzees , family=binomial )
## alist(
##     pulled_left ~ dbinom( 1 , p ),
##     logit(p) <- Intercept +
##         b_prosoc_left*prosoc_left +
##         b_prosoc_left_X_condition*prosoc_left_X_condition,
##     Intercept ~ dnorm(0,10),
##     b_prosoc_left ~ dnorm(0,10),
##     b_prosoc_left_X_condition ~ dnorm(0,10)
## )
## R code 10.35
# outcome and predictor almost perfectly associated
y <- c( rep(0,10) , rep(1,10) )
x <- c( rep(-1,9) , rep(1,11) )
# fit binomial GLM
m.bad <- glm( y ~ x , data=list(y=y,x=x) ,
              family=binomial )
precis(m.bad)
##              Mean  StdDev     5.5%   94.5%
## (Intercept) -9.13 2955.06 -4731.89 4713.63
## x           11.43 2955.06 -4711.33 4734.19
## R code 10.36
m.good <- map(
  alist(
    y ~ dbinom( 1 , p ),
    logit(p) <- a + b*x,
    c(a,b) ~ dnorm(0,10)
  ) , data=list(y=y,x=x) )
precis(m.good)
##    Mean StdDev  5.5% 94.5%
## a -1.73   2.78 -6.16  2.71
## b  4.02   2.78 -0.42  8.45
## R code 10.37
m.good.stan <- map2stan( m.good )
## 
## SAMPLING FOR MODEL 'y ~ dbinom(1, p)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1, Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1, Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1, Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1, Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%]  (Sampling)
## #  Elapsed Time: 0.095 seconds (Warm-up)
## #                0.093 seconds (Sampling)
## #                0.188 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'y ~ dbinom(1, p)' NOW (CHAIN 1).
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
## #  Elapsed Time: 0 seconds (Warm-up)
## #                0 seconds (Sampling)
## #                0 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
pairs(m.good.stan)

## R code 10.38
y <- rbinom(1e5,1000,1/1000)
c( mean(y) , var(y) )
## [1] 1.002720 1.001003
## R code 10.39
library(rethinking)
data(Kline)
d <- Kline
#head(d)

## R code 10.40
d$log_pop <- log(d$population)
d$contact_high <- ifelse( d$contact=="high" , 1 , 0 )
head(d)
##      culture population contact total_tools mean_TU  log_pop contact_high
## 1   Malekula       1100     low          13     3.2 7.003065            0
## 2    Tikopia       1500     low          22     4.7 7.313220            0
## 3 Santa Cruz       3600     low          24     4.0 8.188689            0
## 4        Yap       4791    high          43     5.0 8.474494            1
## 5   Lau Fiji       7400    high          33     5.0 8.909235            1
## 6  Trobriand       8000    high          19     4.0 8.987197            1
## R code 10.41
m10.10 <- map(
  alist(
    total_tools ~ dpois( lambda ),
    log(lambda) <- a + bp*log_pop +
      bc*contact_high + bpc*contact_high*log_pop,
    a ~ dnorm(0,100),
    c(bp,bc,bpc) ~ dnorm(0,1)
  ),
  data=d )

## R code 10.42
precis(m10.10,corr=TRUE)
##      Mean StdDev  5.5% 94.5%     a    bp    bc   bpc
## a    0.94   0.36  0.37  1.52  1.00 -0.98 -0.13  0.07
## bp   0.26   0.03  0.21  0.32 -0.98  1.00  0.12 -0.08
## bc  -0.09   0.84 -1.43  1.25 -0.13  0.12  1.00 -0.99
## bpc  0.04   0.09 -0.10  0.19  0.07 -0.08 -0.99  1.00
#plot(precis(m10.10))
## R code 10.43
post <- extract.samples(m10.10)
lambda_high <- exp( post$a + post$bc + (post$bp + post$bpc)*8 )
lambda_low <- exp( post$a + post$bp*8 )

## R code 10.44
diff <- lambda_high - lambda_low
sum(diff > 0)/length(diff)
## [1] 0.9541
## R code 10.45
# no interaction
m10.11 <- map(
  alist(
    total_tools ~ dpois( lambda ),
    log(lambda) <- a + bp*log_pop + bc*contact_high,
    a ~ dnorm(0,100),
    c(bp,bc) ~ dnorm( 0 , 1 )
  ), data=d )

## R code 10.46
# no contact rate
m10.12 <- map(
  alist(
    total_tools ~ dpois( lambda ),
    log(lambda) <- a + bp*log_pop,
    a ~ dnorm(0,100),
    bp ~ dnorm( 0 , 1 )
  ), data=d )

# no log-population
m10.13 <- map(
  alist(
    total_tools ~ dpois( lambda ),
    log(lambda) <- a + bc*contact_high,
    a ~ dnorm(0,100),
    bc ~ dnorm( 0 , 1 )
  ), data=d )

## R code 10.47
# intercept only
m10.14 <- map(
  alist(
    total_tools ~ dpois( lambda ),
    log(lambda) <- a,
    a ~ dnorm(0,100)
  ), data=d )

# compare all using WAIC
# adding n=1e4 for more stable WAIC estimates
# will also plot the comparison
( islands.compare <- compare(m10.10,m10.11,m10.12,m10.13,m10.14,n=1e4) )
##         WAIC pWAIC dWAIC weight    SE   dSE
## m10.11  79.1   4.2   0.0   0.62 11.14    NA
## m10.10  80.3   4.9   1.2   0.34 11.40  1.12
## m10.12  84.4   3.8   5.4   0.04  8.83  8.26
## m10.14 141.7   8.4  62.6   0.00 31.67 34.57
## m10.13 150.8  17.1  71.7   0.00 44.61 46.71
#plot(islands.compare)
## R code 10.48
# make plot of raw data to begin
# point character (pch) indicates contact rate
pch <- ifelse( d$contact_high==1 , 16 , 1 )
plot( d$log_pop , d$total_tools , col=rangi2 , pch=pch ,
      xlab="log-population" , ylab="total tools" )


# sequence of log-population sizes to compute over
log_pop.seq <- seq( from=6 , to=13 , length.out=30 )

# compute trend for high contact islands
d.pred <- data.frame(
  log_pop = log_pop.seq,
  contact_high = 1
)
lambda.pred.h <- ensemble( m10.10 , m10.11 , m10.12 , data=d.pred )
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
lambda.med <- apply( lambda.pred.h$link , 2 , median )
lambda.PI <- apply( lambda.pred.h$link , 2 , PI )

# plot predicted trend for high contact islands
lines( log_pop.seq , lambda.med , col=rangi2 )
shade( lambda.PI , log_pop.seq , col=col.alpha(rangi2,0.2) )

# compute trend for low contact islands
d.pred <- data.frame(
  log_pop = log_pop.seq,
  contact_high = 0
)
}
lambda.pred.l <- ensemble( m10.10 , m10.11 , m10.12 , data=d.pred )
lambda.med <- apply( lambda.pred.l$link , 2 , median )
lambda.PI <- apply( lambda.pred.l$link , 2 , PI )

# plot again
lines( log_pop.seq , lambda.med , lty=2 )
shade( lambda.PI , log_pop.seq , col=col.alpha("black",0.1) )
## R code 10.49
m10.10stan <- map2stan( m10.10 , iter=3000 , warmup=1000 , chains=4 )
## Warning in FUN(X[[i]], ...): data with name culture is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name contact is not numeric and not
## used
## 
## SAMPLING FOR MODEL 'total_tools ~ dpois(lambda)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 3000 [  0%]  (Warmup)
## Chain 1, Iteration:  300 / 3000 [ 10%]  (Warmup)
## Chain 1, Iteration:  600 / 3000 [ 20%]  (Warmup)
## Chain 1, Iteration:  900 / 3000 [ 30%]  (Warmup)
## Chain 1, Iteration: 1001 / 3000 [ 33%]  (Sampling)
## Chain 1, Iteration: 1300 / 3000 [ 43%]  (Sampling)
## Chain 1, Iteration: 1600 / 3000 [ 53%]  (Sampling)
## Chain 1, Iteration: 1900 / 3000 [ 63%]  (Sampling)
## Chain 1, Iteration: 2200 / 3000 [ 73%]  (Sampling)
## Chain 1, Iteration: 2500 / 3000 [ 83%]  (Sampling)
## Chain 1, Iteration: 2800 / 3000 [ 93%]  (Sampling)
## Chain 1, Iteration: 3000 / 3000 [100%]  (Sampling)
## #  Elapsed Time: 0.267 seconds (Warm-up)
## #                0.629 seconds (Sampling)
## #                0.896 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'total_tools ~ dpois(lambda)' NOW (CHAIN 2).
## 
## Chain 2, Iteration:    1 / 3000 [  0%]  (Warmup)
## Chain 2, Iteration:  300 / 3000 [ 10%]  (Warmup)
## Chain 2, Iteration:  600 / 3000 [ 20%]  (Warmup)
## Chain 2, Iteration:  900 / 3000 [ 30%]  (Warmup)
## Chain 2, Iteration: 1001 / 3000 [ 33%]  (Sampling)
## Chain 2, Iteration: 1300 / 3000 [ 43%]  (Sampling)
## Chain 2, Iteration: 1600 / 3000 [ 53%]  (Sampling)
## Chain 2, Iteration: 1900 / 3000 [ 63%]  (Sampling)
## Chain 2, Iteration: 2200 / 3000 [ 73%]  (Sampling)
## Chain 2, Iteration: 2500 / 3000 [ 83%]  (Sampling)
## Chain 2, Iteration: 2800 / 3000 [ 93%]  (Sampling)
## Chain 2, Iteration: 3000 / 3000 [100%]  (Sampling)
## #  Elapsed Time: 2.121 seconds (Warm-up)
## #                0.629 seconds (Sampling)
## #                2.75 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'total_tools ~ dpois(lambda)' NOW (CHAIN 3).
## 
## Chain 3, Iteration:    1 / 3000 [  0%]  (Warmup)
## Chain 3, Iteration:  300 / 3000 [ 10%]  (Warmup)
## Chain 3, Iteration:  600 / 3000 [ 20%]  (Warmup)
## Chain 3, Iteration:  900 / 3000 [ 30%]  (Warmup)
## Chain 3, Iteration: 1001 / 3000 [ 33%]  (Sampling)
## Chain 3, Iteration: 1300 / 3000 [ 43%]  (Sampling)
## Chain 3, Iteration: 1600 / 3000 [ 53%]  (Sampling)
## Chain 3, Iteration: 1900 / 3000 [ 63%]  (Sampling)
## Chain 3, Iteration: 2200 / 3000 [ 73%]  (Sampling)
## Chain 3, Iteration: 2500 / 3000 [ 83%]  (Sampling)
## Chain 3, Iteration: 2800 / 3000 [ 93%]  (Sampling)
## Chain 3, Iteration: 3000 / 3000 [100%]  (Sampling)
## #  Elapsed Time: 2.096 seconds (Warm-up)
## #                0.501 seconds (Sampling)
## #                2.597 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'total_tools ~ dpois(lambda)' NOW (CHAIN 4).
## 
## Chain 4, Iteration:    1 / 3000 [  0%]  (Warmup)
## Chain 4, Iteration:  300 / 3000 [ 10%]  (Warmup)
## Chain 4, Iteration:  600 / 3000 [ 20%]  (Warmup)
## Chain 4, Iteration:  900 / 3000 [ 30%]  (Warmup)
## Chain 4, Iteration: 1001 / 3000 [ 33%]  (Sampling)
## Chain 4, Iteration: 1300 / 3000 [ 43%]  (Sampling)
## Chain 4, Iteration: 1600 / 3000 [ 53%]  (Sampling)
## Chain 4, Iteration: 1900 / 3000 [ 63%]  (Sampling)
## Chain 4, Iteration: 2200 / 3000 [ 73%]  (Sampling)
## Chain 4, Iteration: 2500 / 3000 [ 83%]  (Sampling)
## Chain 4, Iteration: 2800 / 3000 [ 93%]  (Sampling)
## Chain 4, Iteration: 3000 / 3000 [100%]  (Sampling)
## #  Elapsed Time: 0.814 seconds (Warm-up)
## #                0.642 seconds (Sampling)
## #                1.456 seconds (Total)
## Warning in FUN(X[[i]], ...): data with name culture is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name contact is not numeric and not
## used
## 
## SAMPLING FOR MODEL 'total_tools ~ dpois(lambda)' NOW (CHAIN 1).
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
## #  Elapsed Time: 0 seconds (Warm-up)
## #                0 seconds (Sampling)
## #                0 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 800 / 8000 ]
[ 1600 / 8000 ]
[ 2400 / 8000 ]
[ 3200 / 8000 ]
[ 4000 / 8000 ]
[ 4800 / 8000 ]
[ 5600 / 8000 ]
[ 6400 / 8000 ]
[ 7200 / 8000 ]
[ 8000 / 8000 ]
precis(m10.10stan)
##      Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a    0.94   0.36       0.38       1.51  1985    1
## bp   0.26   0.03       0.21       0.32  1976    1
## bc  -0.10   0.86      -1.48       1.27  1998    1
## bpc  0.04   0.09      -0.11       0.20  2022    1
## R code 10.50
# construct centered predictor
d$log_pop_c <- d$log_pop - mean(d$log_pop)

# re-estimate
m10.10stan.c <- map2stan(
  alist(
    total_tools ~ dpois( lambda ) ,
    log(lambda) <- a + bp*log_pop_c + bc*contact_high +
      bcp*log_pop_c*contact_high ,
    a ~ dnorm(0,10) ,
    bp ~ dnorm(0,1) ,
    bc ~ dnorm(0,1) ,
    bcp ~ dnorm(0,1)
  ) ,
  data=d , iter=3000 , warmup=1000 , chains=4 )
## Warning in FUN(X[[i]], ...): data with name culture is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name contact is not numeric and not
## used
## 
## SAMPLING FOR MODEL 'total_tools ~ dpois(lambda)' NOW (CHAIN 1).
## 
## Chain 1, Iteration:    1 / 3000 [  0%]  (Warmup)
## Chain 1, Iteration:  300 / 3000 [ 10%]  (Warmup)
## Chain 1, Iteration:  600 / 3000 [ 20%]  (Warmup)
## Chain 1, Iteration:  900 / 3000 [ 30%]  (Warmup)
## Chain 1, Iteration: 1001 / 3000 [ 33%]  (Sampling)
## Chain 1, Iteration: 1300 / 3000 [ 43%]  (Sampling)
## Chain 1, Iteration: 1600 / 3000 [ 53%]  (Sampling)
## Chain 1, Iteration: 1900 / 3000 [ 63%]  (Sampling)
## Chain 1, Iteration: 2200 / 3000 [ 73%]  (Sampling)
## Chain 1, Iteration: 2500 / 3000 [ 83%]  (Sampling)
## Chain 1, Iteration: 2800 / 3000 [ 93%]  (Sampling)
## Chain 1, Iteration: 3000 / 3000 [100%]  (Sampling)
## #  Elapsed Time: 0.063 seconds (Warm-up)
## #                0.093 seconds (Sampling)
## #                0.156 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'total_tools ~ dpois(lambda)' NOW (CHAIN 2).
## 
## Chain 2, Iteration:    1 / 3000 [  0%]  (Warmup)
## Chain 2, Iteration:  300 / 3000 [ 10%]  (Warmup)
## Chain 2, Iteration:  600 / 3000 [ 20%]  (Warmup)
## Chain 2, Iteration:  900 / 3000 [ 30%]  (Warmup)
## Chain 2, Iteration: 1001 / 3000 [ 33%]  (Sampling)
## Chain 2, Iteration: 1300 / 3000 [ 43%]  (Sampling)
## Chain 2, Iteration: 1600 / 3000 [ 53%]  (Sampling)
## Chain 2, Iteration: 1900 / 3000 [ 63%]  (Sampling)
## Chain 2, Iteration: 2200 / 3000 [ 73%]  (Sampling)
## Chain 2, Iteration: 2500 / 3000 [ 83%]  (Sampling)
## Chain 2, Iteration: 2800 / 3000 [ 93%]  (Sampling)
## Chain 2, Iteration: 3000 / 3000 [100%]  (Sampling)
## #  Elapsed Time: 0.063 seconds (Warm-up)
## #                0.125 seconds (Sampling)
## #                0.188 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'total_tools ~ dpois(lambda)' NOW (CHAIN 3).
## 
## Chain 3, Iteration:    1 / 3000 [  0%]  (Warmup)
## Chain 3, Iteration:  300 / 3000 [ 10%]  (Warmup)
## Chain 3, Iteration:  600 / 3000 [ 20%]  (Warmup)
## Chain 3, Iteration:  900 / 3000 [ 30%]  (Warmup)
## Chain 3, Iteration: 1001 / 3000 [ 33%]  (Sampling)
## Chain 3, Iteration: 1300 / 3000 [ 43%]  (Sampling)
## Chain 3, Iteration: 1600 / 3000 [ 53%]  (Sampling)
## Chain 3, Iteration: 1900 / 3000 [ 63%]  (Sampling)
## Chain 3, Iteration: 2200 / 3000 [ 73%]  (Sampling)
## Chain 3, Iteration: 2500 / 3000 [ 83%]  (Sampling)
## Chain 3, Iteration: 2800 / 3000 [ 93%]  (Sampling)
## Chain 3, Iteration: 3000 / 3000 [100%]  (Sampling)
## #  Elapsed Time: 0.062 seconds (Warm-up)
## #                0.11 seconds (Sampling)
## #                0.172 seconds (Total)
## 
## 
## SAMPLING FOR MODEL 'total_tools ~ dpois(lambda)' NOW (CHAIN 4).
## 
## Chain 4, Iteration:    1 / 3000 [  0%]  (Warmup)
## Chain 4, Iteration:  300 / 3000 [ 10%]  (Warmup)
## Chain 4, Iteration:  600 / 3000 [ 20%]  (Warmup)
## Chain 4, Iteration:  900 / 3000 [ 30%]  (Warmup)
## Chain 4, Iteration: 1001 / 3000 [ 33%]  (Sampling)
## Chain 4, Iteration: 1300 / 3000 [ 43%]  (Sampling)
## Chain 4, Iteration: 1600 / 3000 [ 53%]  (Sampling)
## Chain 4, Iteration: 1900 / 3000 [ 63%]  (Sampling)
## Chain 4, Iteration: 2200 / 3000 [ 73%]  (Sampling)
## Chain 4, Iteration: 2500 / 3000 [ 83%]  (Sampling)
## Chain 4, Iteration: 2800 / 3000 [ 93%]  (Sampling)
## Chain 4, Iteration: 3000 / 3000 [100%]  (Sampling)
## #  Elapsed Time: 0.063 seconds (Warm-up)
## #                0.093 seconds (Sampling)
## #                0.156 seconds (Total)
## Warning in FUN(X[[i]], ...): data with name culture is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name contact is not numeric and not
## used
## 
## SAMPLING FOR MODEL 'total_tools ~ dpois(lambda)' NOW (CHAIN 1).
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
## #  Elapsed Time: 0 seconds (Warm-up)
## #                0 seconds (Sampling)
## #                0 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 800 / 8000 ]
[ 1600 / 8000 ]
[ 2400 / 8000 ]
[ 3200 / 8000 ]
[ 4000 / 8000 ]
[ 4800 / 8000 ]
[ 5600 / 8000 ]
[ 6400 / 8000 ]
[ 7200 / 8000 ]
[ 8000 / 8000 ]
precis(m10.10stan.c)
##     Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a   3.31   0.09       3.17       3.46  2715    1
## bp  0.26   0.04       0.21       0.32  3064    1
## bc  0.28   0.12       0.09       0.46  2641    1
## bcp 0.07   0.17      -0.19       0.35  3646    1
## R code 10.51
num_days <- 30
y <- rpois( num_days , 1.5 )

## R code 10.52
num_weeks <- 4
y_new <- rpois( num_weeks , 0.5*7 )

## R code 10.53
y_all <- c( y , y_new )
exposure <- c( rep(1,30) , rep(7,4) )
monastery <- c( rep(0,30) , rep(1,4) )
d <- data.frame( y=y_all , days=exposure , monastery=monastery )

## R code 10.54
# compute the offset
d$log_days <- log( d$days )

# fit the model
m10.15 <- map(
  alist(
    y ~ dpois( lambda ),
    log(lambda) <- log_days + a + b*monastery,
    a ~ dnorm(0,100),
    b ~ dnorm(0,1)
  ),
  data=d )
## R code 10.55
post <- extract.samples( m10.15 )
lambda_old <- exp( post$a )
lambda_new <- exp( post$a + post$b )
precis( data.frame( lambda_old , lambda_new ) )
##            Mean StdDev |0.89 0.89|
## lambda_old 1.52   0.22  1.16  1.86
## lambda_new 0.59   0.15  0.35  0.79
## R code 10.56
# simulate career choices among 500 individuals
N <- 500             # number of individuals
income <- 1:3        # expected income of each career
score <- 0.5*income  # scores for each career, based on income
# next line converts scores to probabilities
p <- softmax(score[1],score[2],score[3])

# now simulate choice
# outcome career holds event type values, not counts
career <- rep(NA,N)  # empty vector of choices for each individual
# sample chosen career for each individual
for ( i in 1:N ) career[i] <- sample( 1:3 , size=1 , prob=p )

## R code 10.57
# fit the model, using dcategorical and softmax link
m10.16 <- map(
  alist(
    career ~ dcategorical( softmax(0,s2,s3) ),
    s2 <- b*2,    # linear model for event type 2
    s3 <- b*3,    # linear model for event type 3
    b ~ dnorm(0,5)
  ) ,
  data=list(career=career) )
## R code 10.58
N <- 100
# simulate family incomes for each individual
family_income <- runif(N)
# assign a unique coefficient for each type of event
b <- (1:-1)
career <- rep(NA,N)  # empty vector of choices for each individual
for ( i in 1:N ) {
  score <- 0.5*(1:3) + b*family_income[i]
  p <- softmax(score[1],score[2],score[3])
  career[i] <- sample( 1:3 , size=1 , prob=p )
}

m10.17 <- map(
  alist(
    career ~ dcategorical( softmax(0,s2,s3) ),
    s2 <- a2 + b2*family_income,
    s3 <- a3 + b3*family_income,
    c(a2,a3,b2,b3) ~ dnorm(0,5)
  ) ,
  data=list(career=career,family_income=family_income) )
## R code 10.59
library(rethinking)
data(UCBadmit)
d <- UCBadmit
head(d)
##   dept applicant.gender admit reject applications
## 1    A             male   512    313          825
## 2    A           female    89     19          108
## 3    B             male   353    207          560
## 4    B           female    17      8           25
## 5    C             male   120    205          325
## 6    C           female   202    391          593
## R code 10.60
# binomial model of overall admission probability
m_binom <- map(
  alist(
    admit ~ dbinom(applications,p),
    logit(p) <- a,
    a ~ dnorm(0,100)
  ),
  data=d )

# Poisson model of overall admission rate and rejection rate
d$rej <- d$reject # 'reject' is a reserved word
m_pois <- map2stan(
  alist(
    admit ~ dpois(lambda1),
    rej ~ dpois(lambda2),
    log(lambda1) <- a1,
    log(lambda2) <- a2,
    c(a1,a2) ~ dnorm(0,100)
  ),
  data=d , chains=3 , cores=3 )
## Warning: Variable 'applicant.gender' contains dots '.'.
## Will attempt to remove dots internally.
## Precompiling model...
## Warning in FUN(X[[i]], ...): data with name dept is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name applicant_gender is not numeric
## and not used
## the number of chains is less than 1; sampling not done
## Dispatching 3 chains to 3 cores.
## Warning in FUN(X[[i]], ...): data with name dept is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name applicant_gender is not numeric
## and not used
## 
## SAMPLING FOR MODEL 'admit ~ dpois(lambda1)' NOW (CHAIN 1).
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
## #  Elapsed Time: 0 seconds (Warm-up)
## #                0 seconds (Sampling)
## #                0 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 300 / 3000 ]
[ 600 / 3000 ]
[ 900 / 3000 ]
[ 1200 / 3000 ]
[ 1500 / 3000 ]
[ 1800 / 3000 ]
[ 2100 / 3000 ]
[ 2400 / 3000 ]
[ 2700 / 3000 ]
[ 3000 / 3000 ]
## Warning in map2stan(alist(admit ~ dpois(lambda1), rej ~ dpois(lambda2), : There were 243 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.
## R code 10.61
logistic(coef(m_binom))
## a 
## 1
## R code 10.62
k <- as.numeric(coef(m_pois))
exp(k[1])/(exp(k[1])+exp(k[2]))
## [1] 3.725674e-31
## R code 10.63
# simulate
N <- 100
x <- runif(N)
y <- rgeom( N , prob=logistic( -1 + 2*x ) )

# estimate
m10.18 <- map(
  alist(
    y ~ dgeom( p ),
    logit(p) <- a + b*x,
    a ~ dnorm(0,10),
    b ~ dnorm(0,1)
  ),
  data=list(y=y,x=x) )
precis(m10.18)
##    Mean StdDev  5.5% 94.5%
## a -0.71   0.24 -1.09 -0.33
## b  1.44   0.46  0.71  2.17