## R code 10.1
library(rethinking)
## Loading required package: rstan
## Loading required package: Rcpp
## Loading required package: ggplot2
## rstan (Version 2.8.0, packaged: 2015-09-19 14:48:38 UTC, GitRev: 05c3d0058b6a)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## rstan_options(auto_write = TRUE)
## options(mc.cores = parallel::detectCores())
## Loading required package: parallel
## rethinking (Version 1.57)
data(chimpanzees)
d <- chimpanzees
head(d)
## actor recipient condition block trial prosoc_left chose_prosoc
## 1 1 NA 0 1 2 0 1
## 2 1 NA 0 1 4 0 0
## 3 1 NA 0 1 6 1 0
## 4 1 NA 0 1 8 0 1
## 5 1 NA 0 1 10 1 1
## 6 1 NA 0 1 12 1 1
## pulled_left
## 1 0
## 2 1
## 3 0
## 4 0
## 5 1
## 6 1
summary(d)
## actor recipient condition block trial
## Min. :1 Min. :2 Min. :0.0 Min. :1.0 Min. : 1.00
## 1st Qu.:2 1st Qu.:3 1st Qu.:0.0 1st Qu.:2.0 1st Qu.:18.00
## Median :4 Median :5 Median :0.5 Median :3.5 Median :36.00
## Mean :4 Mean :5 Mean :0.5 Mean :3.5 Mean :36.38
## 3rd Qu.:6 3rd Qu.:7 3rd Qu.:1.0 3rd Qu.:5.0 3rd Qu.:54.00
## Max. :7 Max. :8 Max. :1.0 Max. :6.0 Max. :72.00
## NA's :252
## prosoc_left chose_prosoc pulled_left
## Min. :0.0 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.5 Median :1.0000 Median :1.0000
## Mean :0.5 Mean :0.5675 Mean :0.5794
## 3rd Qu.:1.0 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0 Max. :1.0000 Max. :1.0000
##
dim(d)
## [1] 504 8
## R code 10.2
m10.1 <- map(
alist(
pulled_left ~ dbinom( 1 , p ) ,
logit(p) <- a ,
a ~ dnorm(0,10)
) ,
data=d )
precis(m10.1)
## Mean StdDev 5.5% 94.5%
## a 0.32 0.09 0.18 0.46
## R code 10.3
logistic( c(0.18,0.46) )
## [1] 0.5448789 0.6130142
## R code 10.4
m10.2 <- map(
alist(
pulled_left ~ dbinom( 1 , p ) ,
logit(p) <- a + bp*prosoc_left ,
a ~ dnorm(0,10) ,
bp ~ dnorm(0,10)
) ,
data=d )
m10.3 <- map(
alist(
pulled_left ~ dbinom( 1 , p ) ,
logit(p) <- a + (bp + bpC*condition)*prosoc_left ,
a ~ dnorm(0,10) ,
bp ~ dnorm(0,10) ,
bpC ~ dnorm(0,10)
) ,
data=d )
## R code 10.5
compare( m10.1 , m10.2 , m10.3 )
## WAIC pWAIC dWAIC weight SE dSE
## m10.2 680.5 2 0.0 0.70 9.26 NA
## m10.3 682.3 3 1.8 0.28 9.38 0.85
## m10.1 687.9 1 7.4 0.02 7.09 6.10
## R code 10.6
precis(m10.3)
## Mean StdDev 5.5% 94.5%
## a 0.05 0.13 -0.15 0.25
## bp 0.61 0.23 0.25 0.97
## bpC -0.10 0.26 -0.53 0.32
## R code 10.7
exp(0.61)
## [1] 1.840431
## R code 10.8
logistic( 4 )
## [1] 0.9820138
## R code 10.9
logistic( 4 + 0.61 )
## [1] 0.9901462
## R code 10.10
# dummy data for predictions across treatments
d.pred <- data.frame(
prosoc_left = c(0,1,0,1), # right/left/right/left
condition = c(0,0,1,1) # control/control/partner/partner
)
# build prediction ensemble
chimp.ensemble <- ensemble( m10.1 , m10.2 ,
m10.3 , data=d.pred )
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
# summarize
pred.p <- apply( chimp.ensemble$link , 2 ,
mean )
pred.p.PI <- apply( chimp.ensemble$link , 2 ,
PI )
## R code 10.11
# empty plot frame with good axes
plot( 0 , 0 , type="n" , xlab="prosoc_left/condition" ,
ylab="proportion pulled left" , ylim=c(0,1) , xaxt="n" ,
xlim=c(1,4) )
axis( 1 , at=1:4 , labels=c("0/0","1/0","0/1","1/1") )
# plot raw data, one trend for each of 7 individual chimpanzees
# will use by() here; see Overthinking box for explanation
p <- by( d$pulled_left ,
list(d$prosoc_left,d$condition,d$actor) , mean )
for ( chimp in 1:7 )
lines( 1:4 , as.vector(p[,,chimp]) , col=rangi2 )
# now superimpose posterior predictions
lines( 1:4 , pred.p )
shade( pred.p.PI , 1:4 )

## R code 10.12
# clean NAs from the data
d2 <- d
d2$recipient <- NULL
# re-use map fit to get the formula
m10.3stan <- map2stan( m10.3 , data=d2 , iter=1e4 , warmup=1000 )
##
## SAMPLING FOR MODEL 'pulled_left ~ dbinom(1, p)' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 1, Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 1, Iteration: 1001 / 10000 [ 10%] (Sampling)
## Chain 1, Iteration: 2000 / 10000 [ 20%] (Sampling)
## Chain 1, Iteration: 3000 / 10000 [ 30%] (Sampling)
## Chain 1, Iteration: 4000 / 10000 [ 40%] (Sampling)
## Chain 1, Iteration: 5000 / 10000 [ 50%] (Sampling)
## Chain 1, Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 1, Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 1, Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 1, Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 1, Iteration: 10000 / 10000 [100%] (Sampling)
## # Elapsed Time: 1.953 seconds (Warm-up)
## # 15.129 seconds (Sampling)
## # 17.082 seconds (Total)
##
##
## SAMPLING FOR MODEL 'pulled_left ~ dbinom(1, p)' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 1 [100%] (Sampling)
## # Elapsed Time: 0 seconds (Warm-up)
## # 0 seconds (Sampling)
## # 0 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 900 / 9000 ]
[ 1800 / 9000 ]
[ 2700 / 9000 ]
[ 3600 / 9000 ]
[ 4500 / 9000 ]
[ 5400 / 9000 ]
[ 6300 / 9000 ]
[ 7200 / 9000 ]
[ 8100 / 9000 ]
[ 9000 / 9000 ]
precis(m10.3stan)
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a 0.05 0.12 -0.15 0.24 3198 1
## bp 0.61 0.22 0.26 0.96 3121 1
## bpC -0.11 0.27 -0.55 0.29 3107 1
## R code 10.13
pairs(m10.3stan)

## R code 10.14
m10.4 <- map2stan(
alist(
pulled_left ~ dbinom( 1 , p ) ,
logit(p) <- a[actor] + (bp + bpC*condition)*prosoc_left ,
a[actor] ~ dnorm(0,10),
bp ~ dnorm(0,10),
bpC ~ dnorm(0,10)
) ,
data=d2 , chains=2 , iter=2500 , warmup=500 )
##
## SAMPLING FOR MODEL 'pulled_left ~ dbinom(1, p)' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 2500 [ 0%] (Warmup)
## Chain 1, Iteration: 250 / 2500 [ 10%] (Warmup)
## Chain 1, Iteration: 500 / 2500 [ 20%] (Warmup)
## Chain 1, Iteration: 501 / 2500 [ 20%] (Sampling)
## Chain 1, Iteration: 750 / 2500 [ 30%] (Sampling)
## Chain 1, Iteration: 1000 / 2500 [ 40%] (Sampling)
## Chain 1, Iteration: 1250 / 2500 [ 50%] (Sampling)
## Chain 1, Iteration: 1500 / 2500 [ 60%] (Sampling)
## Chain 1, Iteration: 1750 / 2500 [ 70%] (Sampling)
## Chain 1, Iteration: 2000 / 2500 [ 80%] (Sampling)
## Chain 1, Iteration: 2250 / 2500 [ 90%] (Sampling)
## Chain 1, Iteration: 2500 / 2500 [100%] (Sampling)
## # Elapsed Time: 1.586 seconds (Warm-up)
## # 4.563 seconds (Sampling)
## # 6.149 seconds (Total)
##
##
## SAMPLING FOR MODEL 'pulled_left ~ dbinom(1, p)' NOW (CHAIN 2).
##
## Chain 2, Iteration: 1 / 2500 [ 0%] (Warmup)
## Chain 2, Iteration: 250 / 2500 [ 10%] (Warmup)
## Chain 2, Iteration: 500 / 2500 [ 20%] (Warmup)
## Chain 2, Iteration: 501 / 2500 [ 20%] (Sampling)
## Chain 2, Iteration: 750 / 2500 [ 30%] (Sampling)
## Chain 2, Iteration: 1000 / 2500 [ 40%] (Sampling)
## Chain 2, Iteration: 1250 / 2500 [ 50%] (Sampling)
## Chain 2, Iteration: 1500 / 2500 [ 60%] (Sampling)
## Chain 2, Iteration: 1750 / 2500 [ 70%] (Sampling)
## Chain 2, Iteration: 2000 / 2500 [ 80%] (Sampling)
## Chain 2, Iteration: 2250 / 2500 [ 90%] (Sampling)
## Chain 2, Iteration: 2500 / 2500 [100%] (Sampling)
## # Elapsed Time: 1.71 seconds (Warm-up)
## # 5.756 seconds (Sampling)
## # 7.466 seconds (Total)
##
##
## SAMPLING FOR MODEL 'pulled_left ~ dbinom(1, p)' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 1 [100%] (Sampling)
## # Elapsed Time: 0 seconds (Warm-up)
## # 0 seconds (Sampling)
## # 0 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 400 / 4000 ]
[ 800 / 4000 ]
[ 1200 / 4000 ]
[ 1600 / 4000 ]
[ 2000 / 4000 ]
[ 2400 / 4000 ]
[ 2800 / 4000 ]
[ 3200 / 4000 ]
[ 3600 / 4000 ]
[ 4000 / 4000 ]
## Warning in map2stan(alist(pulled_left ~ dbinom(1, p), logit(p) <- a[actor] + : There were 10 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.
## R code 10.15
unique( d$actor )
## [1] 1 2 3 4 5 6 7
## R code 10.16
precis( m10.4 , depth=2 )
## Warning in precis(m10.4, depth = 2): There were 10 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a[1] -0.75 0.28 -1.17 -0.29 2379 1
## a[2] 10.89 5.42 3.58 18.65 1057 1
## a[3] -1.05 0.29 -1.53 -0.62 2074 1
## a[4] -1.05 0.29 -1.49 -0.58 1583 1
## a[5] -0.74 0.27 -1.17 -0.32 2140 1
## a[6] 0.22 0.27 -0.20 0.64 2601 1
## a[7] 1.81 0.39 1.21 2.46 2603 1
## bp 0.84 0.27 0.40 1.28 1360 1
## bpC -0.14 0.31 -0.68 0.33 1775 1
## R code 10.17
post <- extract.samples( m10.4 )
str( post )
## List of 3
## $ a : num [1:4000, 1:7] -0.462 -0.66 -0.6 -0.423 -0.786 ...
## $ bp : num [1:4000(1d)] 0.335 1.012 1.159 1.046 0.352 ...
## $ bpC: num [1:4000(1d)] 0.398 -0.48 -0.483 -1.006 0.052 ...
## R code 10.18
dens( post$a[,2] )

## R code 10.19
chimp <- 1
d.pred <- list(
pulled_left = rep( 0 , 4 ), # empty outcome
prosoc_left = c(0,1,0,1), # right/left/right/left
condition = c(0,0,1,1), # control/control/partner/partner
actor = rep(chimp,4)
)
link.m10.4 <- link( m10.4 , data=d.pred )
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
pred.p <- apply( link.m10.4 , 2 , mean )
pred.p.PI <- apply( link.m10.4 , 2 , PI )
plot( 0 , 0 , type="n" , xlab="prosoc_left/condition" ,
ylab="proportion pulled left" , ylim=c(0,1) , xaxt="n" ,
xlim=c(1,4) , yaxp=c(0,1,2) )
axis( 1 , at=1:4 , labels=c("0/0","1/0","0/1","1/1") )
mtext( paste( "actor" , chimp ) )
p <- by( d$pulled_left ,
list(d$prosoc_left,d$condition,d$actor) , mean )
lines( 1:4 , as.vector(p[,,chimp]) , col=rangi2 )
lines( 1:4 , pred.p )
shade( pred.p.PI , 1:4 )

## R code 10.20
data(chimpanzees)
d <- chimpanzees
d.aggregated <- aggregate( d$pulled_left ,
list(prosoc_left=d$prosoc_left,condition=d$condition,actor=d$actor) ,
sum )
head(d.aggregated)
## prosoc_left condition actor x
## 1 0 0 1 6
## 2 1 0 1 9
## 3 0 1 1 5
## 4 1 1 1 10
## 5 0 0 2 18
## 6 1 0 2 18
## R code 10.21
m10.5 <- map(
alist(
x ~ dbinom( 18 , p ) ,
logit(p) <- a + (bp + bpC*condition)*prosoc_left ,
a ~ dnorm(0,10) ,
bp ~ dnorm(0,10) ,
bpC ~ dnorm(0,10)
) ,
data=d.aggregated )
## R code 10.22
library(rethinking)
data(UCBadmit)
d <- UCBadmit
head(d)
## dept applicant.gender admit reject applications
## 1 A male 512 313 825
## 2 A female 89 19 108
## 3 B male 353 207 560
## 4 B female 17 8 25
## 5 C male 120 205 325
## 6 C female 202 391 593
## R code 10.23
d$male <- ifelse( d$applicant.gender=="male" , 1 , 0 )
m10.6 <- map(
alist(
admit ~ dbinom( applications , p ) ,
logit(p) <- a + bm*male ,
a ~ dnorm(0,10) ,
bm ~ dnorm(0,10)
) ,
data=d )
m10.7 <- map(
alist(
admit ~ dbinom( applications , p ) ,
logit(p) <- a ,
a ~ dnorm(0,10)
) ,
data=d )
## R code 10.24
compare( m10.6 , m10.7 )
## WAIC pWAIC dWAIC weight SE dSE
## m10.6 5954.8 2.0 0.0 1 34.92 NA
## m10.7 6046.5 1.1 91.7 0 29.88 19.16
## R code 10.25
precis(m10.6)
## Mean StdDev 5.5% 94.5%
## a -0.83 0.05 -0.91 -0.75
## bm 0.61 0.06 0.51 0.71
## R code 10.26
post <- extract.samples( m10.6 )
p.admit.male <- logistic( post$a + post$bm )
p.admit.female <- logistic( post$a )
diff.admit <- p.admit.male - p.admit.female
quantile( diff.admit , c(0.025,0.5,0.975) )
## 2.5% 50% 97.5%
## 0.1134991 0.1417073 0.1693617
## R code 10.27
postcheck( m10.6 , n=1e4 )
## [ 1000 / 10000 ]
[ 2000 / 10000 ]
[ 3000 / 10000 ]
[ 4000 / 10000 ]
[ 5000 / 10000 ]
[ 6000 / 10000 ]
[ 7000 / 10000 ]
[ 8000 / 10000 ]
[ 9000 / 10000 ]
[ 10000 / 10000 ]
## [ 1000 / 10000 ]
[ 2000 / 10000 ]
[ 3000 / 10000 ]
[ 4000 / 10000 ]
[ 5000 / 10000 ]
[ 6000 / 10000 ]
[ 7000 / 10000 ]
[ 8000 / 10000 ]
[ 9000 / 10000 ]
[ 10000 / 10000 ]
# draw lines connecting points from same dept
for ( i in 1:6 ) {
x <- 1 + 2*(i-1)
y1 <- d$admit[x]/d$applications[x]
y2 <- d$admit[x+1]/d$applications[x+1]
lines( c(x,x+1) , c(y1,y2) , col=rangi2 , lwd=2 )
text( x+0.5 , (y1+y2)/2 + 0.05 , d$dept[x] , cex=0.8 , col=rangi2 )
}

## R code 10.28
# make index
d$dept_id <- coerce_index( d$dept )
head(d)
## dept applicant.gender admit reject applications male dept_id
## 1 A male 512 313 825 1 1
## 2 A female 89 19 108 0 1
## 3 B male 353 207 560 1 2
## 4 B female 17 8 25 0 2
## 5 C male 120 205 325 1 3
## 6 C female 202 391 593 0 3
# model with unique intercept for each dept
m10.8 <- map(
alist(
admit ~ dbinom( applications , p ) ,
logit(p) <- a[dept_id] ,
a[dept_id] ~ dnorm(0,10)
) , data=d )
# model with male difference as well
m10.9 <- map(
alist(
admit ~ dbinom( applications , p ) ,
logit(p) <- a[dept_id] + bm*male ,
a[dept_id] ~ dnorm(0,10) ,
bm ~ dnorm(0,10)
) , data=d )
## R code 10.29
compare( m10.6 , m10.7 , m10.8 , m10.9 )
## WAIC pWAIC dWAIC weight SE dSE
## m10.8 5201.1 6.0 0.0 0.53 57.00 NA
## m10.9 5201.3 6.9 0.3 0.47 57.07 2.48
## m10.6 5954.9 2.0 753.8 0.00 35.05 48.56
## m10.7 6046.3 1.0 845.2 0.00 30.06 52.40
## R code 10.30
precis( m10.9 , depth=2 )
## Mean StdDev 5.5% 94.5%
## a[1] 0.68 0.10 0.52 0.84
## a[2] 0.64 0.12 0.45 0.82
## a[3] -0.58 0.07 -0.70 -0.46
## a[4] -0.61 0.09 -0.75 -0.48
## a[5] -1.06 0.10 -1.22 -0.90
## a[6] -2.62 0.16 -2.88 -2.37
## bm -0.10 0.08 -0.23 0.03
## R code 10.31
m10.9stan <- map2stan( m10.9 , chains=2 ,
iter=2500 , warmup=500 )
## Warning: Variable 'applicant.gender' contains dots '.'.
## Will attempt to remove dots internally.
## Warning in FUN(X[[i]], ...): data with name dept is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name applicant_gender is not numeric
## and not used
##
## SAMPLING FOR MODEL 'admit ~ dbinom(applications, p)' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 2500 [ 0%] (Warmup)
## Chain 1, Iteration: 250 / 2500 [ 10%] (Warmup)
## Chain 1, Iteration: 500 / 2500 [ 20%] (Warmup)
## Chain 1, Iteration: 501 / 2500 [ 20%] (Sampling)
## Chain 1, Iteration: 750 / 2500 [ 30%] (Sampling)
## Chain 1, Iteration: 1000 / 2500 [ 40%] (Sampling)
## Chain 1, Iteration: 1250 / 2500 [ 50%] (Sampling)
## Chain 1, Iteration: 1500 / 2500 [ 60%] (Sampling)
## Chain 1, Iteration: 1750 / 2500 [ 70%] (Sampling)
## Chain 1, Iteration: 2000 / 2500 [ 80%] (Sampling)
## Chain 1, Iteration: 2250 / 2500 [ 90%] (Sampling)
## Chain 1, Iteration: 2500 / 2500 [100%] (Sampling)
## # Elapsed Time: 0.048 seconds (Warm-up)
## # 0.172 seconds (Sampling)
## # 0.22 seconds (Total)
##
##
## SAMPLING FOR MODEL 'admit ~ dbinom(applications, p)' NOW (CHAIN 2).
##
## Chain 2, Iteration: 1 / 2500 [ 0%] (Warmup)
## Chain 2, Iteration: 250 / 2500 [ 10%] (Warmup)
## Chain 2, Iteration: 500 / 2500 [ 20%] (Warmup)
## Chain 2, Iteration: 501 / 2500 [ 20%] (Sampling)
## Chain 2, Iteration: 750 / 2500 [ 30%] (Sampling)
## Chain 2, Iteration: 1000 / 2500 [ 40%] (Sampling)
## Chain 2, Iteration: 1250 / 2500 [ 50%] (Sampling)
## Chain 2, Iteration: 1500 / 2500 [ 60%] (Sampling)
## Chain 2, Iteration: 1750 / 2500 [ 70%] (Sampling)
## Chain 2, Iteration: 2000 / 2500 [ 80%] (Sampling)
## Chain 2, Iteration: 2250 / 2500 [ 90%] (Sampling)
## Chain 2, Iteration: 2500 / 2500 [100%] (Sampling)
## # Elapsed Time: 0.046 seconds (Warm-up)
## # 0.157 seconds (Sampling)
## # 0.203 seconds (Total)
## Warning in FUN(X[[i]], ...): data with name dept is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name applicant_gender is not numeric
## and not used
##
## SAMPLING FOR MODEL 'admit ~ dbinom(applications, p)' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 1 [100%] (Sampling)
## # Elapsed Time: 0 seconds (Warm-up)
## # 0 seconds (Sampling)
## # 0 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 400 / 4000 ]
[ 800 / 4000 ]
[ 1200 / 4000 ]
[ 1600 / 4000 ]
[ 2000 / 4000 ]
[ 2400 / 4000 ]
[ 2800 / 4000 ]
[ 3200 / 4000 ]
[ 3600 / 4000 ]
[ 4000 / 4000 ]
## Aggregated binomial counts detected. Splitting to 0/1 outcome for WAIC calculation.
precis(m10.9stan,depth=2)
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a[1] 0.68 0.10 0.52 0.84 1365 1
## a[2] 0.64 0.12 0.43 0.81 1355 1
## a[3] -0.58 0.07 -0.70 -0.46 2535 1
## a[4] -0.62 0.09 -0.75 -0.47 1870 1
## a[5] -1.06 0.10 -1.22 -0.90 2346 1
## a[6] -2.64 0.16 -2.88 -2.36 2388 1
## bm -0.10 0.08 -0.24 0.03 1038 1
## R code 10.32
m10.7glm <- glm( cbind(admit,reject) ~ 1 , data=d , family=binomial )
m10.6glm <- glm( cbind(admit,reject) ~ male , data=d , family=binomial )
m10.8glm <- glm( cbind(admit,reject) ~ dept , data=d , family=binomial )
m10.9glm <- glm( cbind(admit,reject) ~ male + dept , data=d ,
family=binomial )
## R code 10.33
data(chimpanzees)
m10.4glm <- glm(
pulled_left ~ as.factor(actor) + prosoc_left * condition - condition ,
data=chimpanzees , family=binomial )
## R code 10.34
glimmer( pulled_left ~ prosoc_left * condition - condition ,
data=chimpanzees , family=binomial )
## alist(
## pulled_left ~ dbinom( 1 , p ),
## logit(p) <- Intercept +
## b_prosoc_left*prosoc_left +
## b_prosoc_left_X_condition*prosoc_left_X_condition,
## Intercept ~ dnorm(0,10),
## b_prosoc_left ~ dnorm(0,10),
## b_prosoc_left_X_condition ~ dnorm(0,10)
## )
## R code 10.35
# outcome and predictor almost perfectly associated
y <- c( rep(0,10) , rep(1,10) )
x <- c( rep(-1,9) , rep(1,11) )
# fit binomial GLM
m.bad <- glm( y ~ x , data=list(y=y,x=x) ,
family=binomial )
precis(m.bad)
## Mean StdDev 5.5% 94.5%
## (Intercept) -9.13 2955.06 -4731.89 4713.63
## x 11.43 2955.06 -4711.33 4734.19
## R code 10.36
m.good <- map(
alist(
y ~ dbinom( 1 , p ),
logit(p) <- a + b*x,
c(a,b) ~ dnorm(0,10)
) , data=list(y=y,x=x) )
precis(m.good)
## Mean StdDev 5.5% 94.5%
## a -1.73 2.78 -6.16 2.71
## b 4.02 2.78 -0.42 8.45
## R code 10.37
m.good.stan <- map2stan( m.good )
##
## SAMPLING FOR MODEL 'y ~ dbinom(1, p)' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1, Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1, Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1, Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1, Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%] (Sampling)
## # Elapsed Time: 0.095 seconds (Warm-up)
## # 0.093 seconds (Sampling)
## # 0.188 seconds (Total)
##
##
## SAMPLING FOR MODEL 'y ~ dbinom(1, p)' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 1 [100%] (Sampling)
## # Elapsed Time: 0 seconds (Warm-up)
## # 0 seconds (Sampling)
## # 0 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
pairs(m.good.stan)

## R code 10.38
y <- rbinom(1e5,1000,1/1000)
c( mean(y) , var(y) )
## [1] 1.002720 1.001003
## R code 10.39
library(rethinking)
data(Kline)
d <- Kline
#head(d)
## R code 10.40
d$log_pop <- log(d$population)
d$contact_high <- ifelse( d$contact=="high" , 1 , 0 )
head(d)
## culture population contact total_tools mean_TU log_pop contact_high
## 1 Malekula 1100 low 13 3.2 7.003065 0
## 2 Tikopia 1500 low 22 4.7 7.313220 0
## 3 Santa Cruz 3600 low 24 4.0 8.188689 0
## 4 Yap 4791 high 43 5.0 8.474494 1
## 5 Lau Fiji 7400 high 33 5.0 8.909235 1
## 6 Trobriand 8000 high 19 4.0 8.987197 1
## R code 10.41
m10.10 <- map(
alist(
total_tools ~ dpois( lambda ),
log(lambda) <- a + bp*log_pop +
bc*contact_high + bpc*contact_high*log_pop,
a ~ dnorm(0,100),
c(bp,bc,bpc) ~ dnorm(0,1)
),
data=d )
## R code 10.42
precis(m10.10,corr=TRUE)
## Mean StdDev 5.5% 94.5% a bp bc bpc
## a 0.94 0.36 0.37 1.52 1.00 -0.98 -0.13 0.07
## bp 0.26 0.03 0.21 0.32 -0.98 1.00 0.12 -0.08
## bc -0.09 0.84 -1.43 1.25 -0.13 0.12 1.00 -0.99
## bpc 0.04 0.09 -0.10 0.19 0.07 -0.08 -0.99 1.00
#plot(precis(m10.10))
## R code 10.43
post <- extract.samples(m10.10)
lambda_high <- exp( post$a + post$bc + (post$bp + post$bpc)*8 )
lambda_low <- exp( post$a + post$bp*8 )
## R code 10.44
diff <- lambda_high - lambda_low
sum(diff > 0)/length(diff)
## [1] 0.9541
## R code 10.45
# no interaction
m10.11 <- map(
alist(
total_tools ~ dpois( lambda ),
log(lambda) <- a + bp*log_pop + bc*contact_high,
a ~ dnorm(0,100),
c(bp,bc) ~ dnorm( 0 , 1 )
), data=d )
## R code 10.46
# no contact rate
m10.12 <- map(
alist(
total_tools ~ dpois( lambda ),
log(lambda) <- a + bp*log_pop,
a ~ dnorm(0,100),
bp ~ dnorm( 0 , 1 )
), data=d )
# no log-population
m10.13 <- map(
alist(
total_tools ~ dpois( lambda ),
log(lambda) <- a + bc*contact_high,
a ~ dnorm(0,100),
bc ~ dnorm( 0 , 1 )
), data=d )
## R code 10.47
# intercept only
m10.14 <- map(
alist(
total_tools ~ dpois( lambda ),
log(lambda) <- a,
a ~ dnorm(0,100)
), data=d )
# compare all using WAIC
# adding n=1e4 for more stable WAIC estimates
# will also plot the comparison
( islands.compare <- compare(m10.10,m10.11,m10.12,m10.13,m10.14,n=1e4) )
## WAIC pWAIC dWAIC weight SE dSE
## m10.11 79.1 4.2 0.0 0.62 11.14 NA
## m10.10 80.3 4.9 1.2 0.34 11.40 1.12
## m10.12 84.4 3.8 5.4 0.04 8.83 8.26
## m10.14 141.7 8.4 62.6 0.00 31.67 34.57
## m10.13 150.8 17.1 71.7 0.00 44.61 46.71
#plot(islands.compare)
## R code 10.48
# make plot of raw data to begin
# point character (pch) indicates contact rate
pch <- ifelse( d$contact_high==1 , 16 , 1 )
plot( d$log_pop , d$total_tools , col=rangi2 , pch=pch ,
xlab="log-population" , ylab="total tools" )
# sequence of log-population sizes to compute over
log_pop.seq <- seq( from=6 , to=13 , length.out=30 )
# compute trend for high contact islands
d.pred <- data.frame(
log_pop = log_pop.seq,
contact_high = 1
)
lambda.pred.h <- ensemble( m10.10 , m10.11 , m10.12 , data=d.pred )
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
lambda.med <- apply( lambda.pred.h$link , 2 , median )
lambda.PI <- apply( lambda.pred.h$link , 2 , PI )
# plot predicted trend for high contact islands
lines( log_pop.seq , lambda.med , col=rangi2 )
shade( lambda.PI , log_pop.seq , col=col.alpha(rangi2,0.2) )

# compute trend for low contact islands
d.pred <- data.frame(
log_pop = log_pop.seq,
contact_high = 0
)
}
lambda.pred.l <- ensemble( m10.10 , m10.11 , m10.12 , data=d.pred )
lambda.med <- apply( lambda.pred.l$link , 2 , median )
lambda.PI <- apply( lambda.pred.l$link , 2 , PI )
# plot again
lines( log_pop.seq , lambda.med , lty=2 )
shade( lambda.PI , log_pop.seq , col=col.alpha("black",0.1) )
## R code 10.49
m10.10stan <- map2stan( m10.10 , iter=3000 , warmup=1000 , chains=4 )
## Warning in FUN(X[[i]], ...): data with name culture is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name contact is not numeric and not
## used
##
## SAMPLING FOR MODEL 'total_tools ~ dpois(lambda)' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 3000 [ 0%] (Warmup)
## Chain 1, Iteration: 300 / 3000 [ 10%] (Warmup)
## Chain 1, Iteration: 600 / 3000 [ 20%] (Warmup)
## Chain 1, Iteration: 900 / 3000 [ 30%] (Warmup)
## Chain 1, Iteration: 1001 / 3000 [ 33%] (Sampling)
## Chain 1, Iteration: 1300 / 3000 [ 43%] (Sampling)
## Chain 1, Iteration: 1600 / 3000 [ 53%] (Sampling)
## Chain 1, Iteration: 1900 / 3000 [ 63%] (Sampling)
## Chain 1, Iteration: 2200 / 3000 [ 73%] (Sampling)
## Chain 1, Iteration: 2500 / 3000 [ 83%] (Sampling)
## Chain 1, Iteration: 2800 / 3000 [ 93%] (Sampling)
## Chain 1, Iteration: 3000 / 3000 [100%] (Sampling)
## # Elapsed Time: 0.267 seconds (Warm-up)
## # 0.629 seconds (Sampling)
## # 0.896 seconds (Total)
##
##
## SAMPLING FOR MODEL 'total_tools ~ dpois(lambda)' NOW (CHAIN 2).
##
## Chain 2, Iteration: 1 / 3000 [ 0%] (Warmup)
## Chain 2, Iteration: 300 / 3000 [ 10%] (Warmup)
## Chain 2, Iteration: 600 / 3000 [ 20%] (Warmup)
## Chain 2, Iteration: 900 / 3000 [ 30%] (Warmup)
## Chain 2, Iteration: 1001 / 3000 [ 33%] (Sampling)
## Chain 2, Iteration: 1300 / 3000 [ 43%] (Sampling)
## Chain 2, Iteration: 1600 / 3000 [ 53%] (Sampling)
## Chain 2, Iteration: 1900 / 3000 [ 63%] (Sampling)
## Chain 2, Iteration: 2200 / 3000 [ 73%] (Sampling)
## Chain 2, Iteration: 2500 / 3000 [ 83%] (Sampling)
## Chain 2, Iteration: 2800 / 3000 [ 93%] (Sampling)
## Chain 2, Iteration: 3000 / 3000 [100%] (Sampling)
## # Elapsed Time: 2.121 seconds (Warm-up)
## # 0.629 seconds (Sampling)
## # 2.75 seconds (Total)
##
##
## SAMPLING FOR MODEL 'total_tools ~ dpois(lambda)' NOW (CHAIN 3).
##
## Chain 3, Iteration: 1 / 3000 [ 0%] (Warmup)
## Chain 3, Iteration: 300 / 3000 [ 10%] (Warmup)
## Chain 3, Iteration: 600 / 3000 [ 20%] (Warmup)
## Chain 3, Iteration: 900 / 3000 [ 30%] (Warmup)
## Chain 3, Iteration: 1001 / 3000 [ 33%] (Sampling)
## Chain 3, Iteration: 1300 / 3000 [ 43%] (Sampling)
## Chain 3, Iteration: 1600 / 3000 [ 53%] (Sampling)
## Chain 3, Iteration: 1900 / 3000 [ 63%] (Sampling)
## Chain 3, Iteration: 2200 / 3000 [ 73%] (Sampling)
## Chain 3, Iteration: 2500 / 3000 [ 83%] (Sampling)
## Chain 3, Iteration: 2800 / 3000 [ 93%] (Sampling)
## Chain 3, Iteration: 3000 / 3000 [100%] (Sampling)
## # Elapsed Time: 2.096 seconds (Warm-up)
## # 0.501 seconds (Sampling)
## # 2.597 seconds (Total)
##
##
## SAMPLING FOR MODEL 'total_tools ~ dpois(lambda)' NOW (CHAIN 4).
##
## Chain 4, Iteration: 1 / 3000 [ 0%] (Warmup)
## Chain 4, Iteration: 300 / 3000 [ 10%] (Warmup)
## Chain 4, Iteration: 600 / 3000 [ 20%] (Warmup)
## Chain 4, Iteration: 900 / 3000 [ 30%] (Warmup)
## Chain 4, Iteration: 1001 / 3000 [ 33%] (Sampling)
## Chain 4, Iteration: 1300 / 3000 [ 43%] (Sampling)
## Chain 4, Iteration: 1600 / 3000 [ 53%] (Sampling)
## Chain 4, Iteration: 1900 / 3000 [ 63%] (Sampling)
## Chain 4, Iteration: 2200 / 3000 [ 73%] (Sampling)
## Chain 4, Iteration: 2500 / 3000 [ 83%] (Sampling)
## Chain 4, Iteration: 2800 / 3000 [ 93%] (Sampling)
## Chain 4, Iteration: 3000 / 3000 [100%] (Sampling)
## # Elapsed Time: 0.814 seconds (Warm-up)
## # 0.642 seconds (Sampling)
## # 1.456 seconds (Total)
## Warning in FUN(X[[i]], ...): data with name culture is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name contact is not numeric and not
## used
##
## SAMPLING FOR MODEL 'total_tools ~ dpois(lambda)' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 1 [100%] (Sampling)
## # Elapsed Time: 0 seconds (Warm-up)
## # 0 seconds (Sampling)
## # 0 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 800 / 8000 ]
[ 1600 / 8000 ]
[ 2400 / 8000 ]
[ 3200 / 8000 ]
[ 4000 / 8000 ]
[ 4800 / 8000 ]
[ 5600 / 8000 ]
[ 6400 / 8000 ]
[ 7200 / 8000 ]
[ 8000 / 8000 ]
precis(m10.10stan)
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a 0.94 0.36 0.38 1.51 1985 1
## bp 0.26 0.03 0.21 0.32 1976 1
## bc -0.10 0.86 -1.48 1.27 1998 1
## bpc 0.04 0.09 -0.11 0.20 2022 1
## R code 10.50
# construct centered predictor
d$log_pop_c <- d$log_pop - mean(d$log_pop)
# re-estimate
m10.10stan.c <- map2stan(
alist(
total_tools ~ dpois( lambda ) ,
log(lambda) <- a + bp*log_pop_c + bc*contact_high +
bcp*log_pop_c*contact_high ,
a ~ dnorm(0,10) ,
bp ~ dnorm(0,1) ,
bc ~ dnorm(0,1) ,
bcp ~ dnorm(0,1)
) ,
data=d , iter=3000 , warmup=1000 , chains=4 )
## Warning in FUN(X[[i]], ...): data with name culture is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name contact is not numeric and not
## used
##
## SAMPLING FOR MODEL 'total_tools ~ dpois(lambda)' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 3000 [ 0%] (Warmup)
## Chain 1, Iteration: 300 / 3000 [ 10%] (Warmup)
## Chain 1, Iteration: 600 / 3000 [ 20%] (Warmup)
## Chain 1, Iteration: 900 / 3000 [ 30%] (Warmup)
## Chain 1, Iteration: 1001 / 3000 [ 33%] (Sampling)
## Chain 1, Iteration: 1300 / 3000 [ 43%] (Sampling)
## Chain 1, Iteration: 1600 / 3000 [ 53%] (Sampling)
## Chain 1, Iteration: 1900 / 3000 [ 63%] (Sampling)
## Chain 1, Iteration: 2200 / 3000 [ 73%] (Sampling)
## Chain 1, Iteration: 2500 / 3000 [ 83%] (Sampling)
## Chain 1, Iteration: 2800 / 3000 [ 93%] (Sampling)
## Chain 1, Iteration: 3000 / 3000 [100%] (Sampling)
## # Elapsed Time: 0.063 seconds (Warm-up)
## # 0.093 seconds (Sampling)
## # 0.156 seconds (Total)
##
##
## SAMPLING FOR MODEL 'total_tools ~ dpois(lambda)' NOW (CHAIN 2).
##
## Chain 2, Iteration: 1 / 3000 [ 0%] (Warmup)
## Chain 2, Iteration: 300 / 3000 [ 10%] (Warmup)
## Chain 2, Iteration: 600 / 3000 [ 20%] (Warmup)
## Chain 2, Iteration: 900 / 3000 [ 30%] (Warmup)
## Chain 2, Iteration: 1001 / 3000 [ 33%] (Sampling)
## Chain 2, Iteration: 1300 / 3000 [ 43%] (Sampling)
## Chain 2, Iteration: 1600 / 3000 [ 53%] (Sampling)
## Chain 2, Iteration: 1900 / 3000 [ 63%] (Sampling)
## Chain 2, Iteration: 2200 / 3000 [ 73%] (Sampling)
## Chain 2, Iteration: 2500 / 3000 [ 83%] (Sampling)
## Chain 2, Iteration: 2800 / 3000 [ 93%] (Sampling)
## Chain 2, Iteration: 3000 / 3000 [100%] (Sampling)
## # Elapsed Time: 0.063 seconds (Warm-up)
## # 0.125 seconds (Sampling)
## # 0.188 seconds (Total)
##
##
## SAMPLING FOR MODEL 'total_tools ~ dpois(lambda)' NOW (CHAIN 3).
##
## Chain 3, Iteration: 1 / 3000 [ 0%] (Warmup)
## Chain 3, Iteration: 300 / 3000 [ 10%] (Warmup)
## Chain 3, Iteration: 600 / 3000 [ 20%] (Warmup)
## Chain 3, Iteration: 900 / 3000 [ 30%] (Warmup)
## Chain 3, Iteration: 1001 / 3000 [ 33%] (Sampling)
## Chain 3, Iteration: 1300 / 3000 [ 43%] (Sampling)
## Chain 3, Iteration: 1600 / 3000 [ 53%] (Sampling)
## Chain 3, Iteration: 1900 / 3000 [ 63%] (Sampling)
## Chain 3, Iteration: 2200 / 3000 [ 73%] (Sampling)
## Chain 3, Iteration: 2500 / 3000 [ 83%] (Sampling)
## Chain 3, Iteration: 2800 / 3000 [ 93%] (Sampling)
## Chain 3, Iteration: 3000 / 3000 [100%] (Sampling)
## # Elapsed Time: 0.062 seconds (Warm-up)
## # 0.11 seconds (Sampling)
## # 0.172 seconds (Total)
##
##
## SAMPLING FOR MODEL 'total_tools ~ dpois(lambda)' NOW (CHAIN 4).
##
## Chain 4, Iteration: 1 / 3000 [ 0%] (Warmup)
## Chain 4, Iteration: 300 / 3000 [ 10%] (Warmup)
## Chain 4, Iteration: 600 / 3000 [ 20%] (Warmup)
## Chain 4, Iteration: 900 / 3000 [ 30%] (Warmup)
## Chain 4, Iteration: 1001 / 3000 [ 33%] (Sampling)
## Chain 4, Iteration: 1300 / 3000 [ 43%] (Sampling)
## Chain 4, Iteration: 1600 / 3000 [ 53%] (Sampling)
## Chain 4, Iteration: 1900 / 3000 [ 63%] (Sampling)
## Chain 4, Iteration: 2200 / 3000 [ 73%] (Sampling)
## Chain 4, Iteration: 2500 / 3000 [ 83%] (Sampling)
## Chain 4, Iteration: 2800 / 3000 [ 93%] (Sampling)
## Chain 4, Iteration: 3000 / 3000 [100%] (Sampling)
## # Elapsed Time: 0.063 seconds (Warm-up)
## # 0.093 seconds (Sampling)
## # 0.156 seconds (Total)
## Warning in FUN(X[[i]], ...): data with name culture is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name contact is not numeric and not
## used
##
## SAMPLING FOR MODEL 'total_tools ~ dpois(lambda)' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 1 [100%] (Sampling)
## # Elapsed Time: 0 seconds (Warm-up)
## # 0 seconds (Sampling)
## # 0 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 800 / 8000 ]
[ 1600 / 8000 ]
[ 2400 / 8000 ]
[ 3200 / 8000 ]
[ 4000 / 8000 ]
[ 4800 / 8000 ]
[ 5600 / 8000 ]
[ 6400 / 8000 ]
[ 7200 / 8000 ]
[ 8000 / 8000 ]
precis(m10.10stan.c)
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a 3.31 0.09 3.17 3.46 2715 1
## bp 0.26 0.04 0.21 0.32 3064 1
## bc 0.28 0.12 0.09 0.46 2641 1
## bcp 0.07 0.17 -0.19 0.35 3646 1
## R code 10.51
num_days <- 30
y <- rpois( num_days , 1.5 )
## R code 10.52
num_weeks <- 4
y_new <- rpois( num_weeks , 0.5*7 )
## R code 10.53
y_all <- c( y , y_new )
exposure <- c( rep(1,30) , rep(7,4) )
monastery <- c( rep(0,30) , rep(1,4) )
d <- data.frame( y=y_all , days=exposure , monastery=monastery )
## R code 10.54
# compute the offset
d$log_days <- log( d$days )
# fit the model
m10.15 <- map(
alist(
y ~ dpois( lambda ),
log(lambda) <- log_days + a + b*monastery,
a ~ dnorm(0,100),
b ~ dnorm(0,1)
),
data=d )
## R code 10.55
post <- extract.samples( m10.15 )
lambda_old <- exp( post$a )
lambda_new <- exp( post$a + post$b )
precis( data.frame( lambda_old , lambda_new ) )
## Mean StdDev |0.89 0.89|
## lambda_old 1.52 0.22 1.16 1.86
## lambda_new 0.59 0.15 0.35 0.79
## R code 10.56
# simulate career choices among 500 individuals
N <- 500 # number of individuals
income <- 1:3 # expected income of each career
score <- 0.5*income # scores for each career, based on income
# next line converts scores to probabilities
p <- softmax(score[1],score[2],score[3])
# now simulate choice
# outcome career holds event type values, not counts
career <- rep(NA,N) # empty vector of choices for each individual
# sample chosen career for each individual
for ( i in 1:N ) career[i] <- sample( 1:3 , size=1 , prob=p )
## R code 10.57
# fit the model, using dcategorical and softmax link
m10.16 <- map(
alist(
career ~ dcategorical( softmax(0,s2,s3) ),
s2 <- b*2, # linear model for event type 2
s3 <- b*3, # linear model for event type 3
b ~ dnorm(0,5)
) ,
data=list(career=career) )
## R code 10.58
N <- 100
# simulate family incomes for each individual
family_income <- runif(N)
# assign a unique coefficient for each type of event
b <- (1:-1)
career <- rep(NA,N) # empty vector of choices for each individual
for ( i in 1:N ) {
score <- 0.5*(1:3) + b*family_income[i]
p <- softmax(score[1],score[2],score[3])
career[i] <- sample( 1:3 , size=1 , prob=p )
}
m10.17 <- map(
alist(
career ~ dcategorical( softmax(0,s2,s3) ),
s2 <- a2 + b2*family_income,
s3 <- a3 + b3*family_income,
c(a2,a3,b2,b3) ~ dnorm(0,5)
) ,
data=list(career=career,family_income=family_income) )
## R code 10.59
library(rethinking)
data(UCBadmit)
d <- UCBadmit
head(d)
## dept applicant.gender admit reject applications
## 1 A male 512 313 825
## 2 A female 89 19 108
## 3 B male 353 207 560
## 4 B female 17 8 25
## 5 C male 120 205 325
## 6 C female 202 391 593
## R code 10.60
# binomial model of overall admission probability
m_binom <- map(
alist(
admit ~ dbinom(applications,p),
logit(p) <- a,
a ~ dnorm(0,100)
),
data=d )
# Poisson model of overall admission rate and rejection rate
d$rej <- d$reject # 'reject' is a reserved word
m_pois <- map2stan(
alist(
admit ~ dpois(lambda1),
rej ~ dpois(lambda2),
log(lambda1) <- a1,
log(lambda2) <- a2,
c(a1,a2) ~ dnorm(0,100)
),
data=d , chains=3 , cores=3 )
## Warning: Variable 'applicant.gender' contains dots '.'.
## Will attempt to remove dots internally.
## Precompiling model...
## Warning in FUN(X[[i]], ...): data with name dept is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name applicant_gender is not numeric
## and not used
## the number of chains is less than 1; sampling not done
## Dispatching 3 chains to 3 cores.
## Warning in FUN(X[[i]], ...): data with name dept is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name applicant_gender is not numeric
## and not used
##
## SAMPLING FOR MODEL 'admit ~ dpois(lambda1)' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 1 [100%] (Sampling)
## # Elapsed Time: 0 seconds (Warm-up)
## # 0 seconds (Sampling)
## # 0 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 300 / 3000 ]
[ 600 / 3000 ]
[ 900 / 3000 ]
[ 1200 / 3000 ]
[ 1500 / 3000 ]
[ 1800 / 3000 ]
[ 2100 / 3000 ]
[ 2400 / 3000 ]
[ 2700 / 3000 ]
[ 3000 / 3000 ]
## Warning in map2stan(alist(admit ~ dpois(lambda1), rej ~ dpois(lambda2), : There were 243 divergent iterations during sampling.
## Check the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.
## R code 10.61
logistic(coef(m_binom))
## a
## 1
## R code 10.62
k <- as.numeric(coef(m_pois))
exp(k[1])/(exp(k[1])+exp(k[2]))
## [1] 3.725674e-31
## R code 10.63
# simulate
N <- 100
x <- runif(N)
y <- rgeom( N , prob=logistic( -1 + 2*x ) )
# estimate
m10.18 <- map(
alist(
y ~ dgeom( p ),
logit(p) <- a + b*x,
a ~ dnorm(0,10),
b ~ dnorm(0,1)
),
data=list(y=y,x=x) )
precis(m10.18)
## Mean StdDev 5.5% 94.5%
## a -0.71 0.24 -1.09 -0.33
## b 1.44 0.46 0.71 2.17