## R code 11.1
library(rethinking)
## Loading required package: rstan
## Loading required package: Rcpp
## Loading required package: ggplot2
## rstan (Version 2.8.0, packaged: 2015-09-19 14:48:38 UTC, GitRev: 05c3d0058b6a)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## rstan_options(auto_write = TRUE)
## options(mc.cores = parallel::detectCores())
## Loading required package: parallel
## rethinking (Version 1.57)
data(Trolley)
d <- Trolley
## R code 11.2
simplehist( d$response , xlim=c(1,7) , xlab="response" )

## R code 11.3
# discrete proportion of each response value
pr_k <- table( d$response ) / nrow(d)
# cumsum converts to cumulative proportions
cum_pr_k <- cumsum( pr_k )
# plot
plot( 1:7 , cum_pr_k , type="b" , xlab="response" ,
ylab="cumulative proportion" , ylim=c(0,1) )

## R code 11.4
logit <- function(x) log(x/(1-x)) # convenience function
( lco <- logit( cum_pr_k ) )
## 1 2 3 4 5 6
## -1.9160912 -1.2666056 -0.7186340 0.2477857 0.8898637 1.7693809
## 7
## Inf
## R code 11.5
m11.1 <- map(
alist(
response ~ dordlogit( phi , c(a1,a2,a3,a4,a5,a6) ),
phi <- 0,
c(a1,a2,a3,a4,a5,a6) ~ dnorm(0,10)
) ,
data=d ,
start=list(a1=-2,a2=-1,a3=0,a4=1,a5=2,a6=2.5) )
## R code 11.6
precis(m11.1)
## Mean StdDev 5.5% 94.5%
## a1 -1.92 0.03 -1.96 -1.87
## a2 -1.27 0.02 -1.31 -1.23
## a3 -0.72 0.02 -0.75 -0.68
## a4 0.25 0.02 0.22 0.28
## a5 0.89 0.02 0.85 0.93
## a6 1.77 0.03 1.72 1.81
## R code 11.7
logistic(coef(m11.1))
## a1 a2 a3 a4 a5 a6
## 0.1283005 0.2198398 0.3276948 0.5616311 0.7088609 0.8543786
## R code 11.8
# note that data with name 'case' not allowed in Stan
# so will pass pruned data list
m11.1stan <- map2stan(
alist(
response ~ dordlogit( phi , cutpoints ),
phi <- 0,
cutpoints ~ dnorm(0,10)
) ,
data=list(response=d$response),
start=list(cutpoints=c(-2,-1,0,1,2,2.5)) ,
chains=2 , cores=2 )
## Precompiling model...
## the number of chains is less than 1; sampling not done
## Dispatching 2 chains to 2 cores.
##
## SAMPLING FOR MODEL 'response ~ dordlogit(phi, cutpoints)' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 1 [100%] (Sampling)
## # Elapsed Time: 0 seconds (Warm-up)
## # 0.016 seconds (Sampling)
## # 0.016 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 200 / 2000 ]
[ 400 / 2000 ]
[ 600 / 2000 ]
[ 800 / 2000 ]
[ 1000 / 2000 ]
[ 1200 / 2000 ]
[ 1400 / 2000 ]
[ 1600 / 2000 ]
[ 1800 / 2000 ]
[ 2000 / 2000 ]
## Warning: Reached total allocation of 1535Mb: see help(memory.size)
## Warning: Reached total allocation of 1535Mb: see help(memory.size)
## Warning: Reached total allocation of 1535Mb: see help(memory.size)
## Warning: Reached total allocation of 1535Mb: see help(memory.size)
## Warning: Reached total allocation of 1535Mb: see help(memory.size)
## Warning in log_sum_exp(ll): Reached total allocation of 1535Mb: see
## help(memory.size)
## Warning: Reached total allocation of 1535Mb: see help(memory.size)
## Warning: Reached total allocation of 1535Mb: see help(memory.size)
## Warning: Reached total allocation of 1535Mb: see help(memory.size)
# need depth=2 to show vector of parameters
precis(m11.1stan,depth=2)
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## cutpoints[1] -1.92 0.03 -1.96 -1.87 1169 1
## cutpoints[2] -1.27 0.02 -1.31 -1.23 1551 1
## cutpoints[3] -0.72 0.02 -0.75 -0.69 2000 1
## cutpoints[4] 0.25 0.02 0.22 0.28 2000 1
## cutpoints[5] 0.89 0.02 0.85 0.93 2000 1
## cutpoints[6] 1.77 0.03 1.73 1.82 2000 1
## R code 11.9
( pk <- dordlogit( 1:7 , 0 , coef(m11.1) ) )
## [1] 0.12830051 0.09153931 0.10785502 0.23393627 0.14722982 0.14551766
## [7] 0.14562142
## R code 11.10
sum( pk*(1:7) )
## [1] 4.199294
## R code 11.11
( pk <- dordlogit( 1:7 , 0 , coef(m11.1)-0.5 ) )
## [1] 0.08195550 0.06401015 0.08221206 0.20910054 0.15897033 0.18438530
## [7] 0.21936612
## R code 11.12
sum( pk*(1:7) )
## [1] 4.72974
## R code 11.13
m11.2 <- map(
alist(
response ~ dordlogit( phi , c(a1,a2,a3,a4,a5,a6) ) ,
phi <- bA*action + bI*intention + bC*contact,
c(bA,bI,bC) ~ dnorm(0,10),
c(a1,a2,a3,a4,a5,a6) ~ dnorm(0,10)
) ,
data=d ,
start=list(a1=-1.9,a2=-1.2,a3=-0.7,a4=0.2,a5=0.9,a6=1.8) )
## R code 11.14
m11.3 <- map(
alist(
response ~ dordlogit( phi , c(a1,a2,a3,a4,a5,a6) ) ,
phi <- bA*action + bI*intention + bC*contact +
bAI*action*intention + bCI*contact*intention ,
c(bA,bI,bC,bAI,bCI) ~ dnorm(0,10),
c(a1,a2,a3,a4,a5,a6) ~ dnorm(0,10)
) ,
data=d ,
start=list(a1=-1.9,a2=-1.2,a3=-0.7,a4=0.2,a5=0.9,a6=1.8) )
## R code 11.15
coeftab(m11.1,m11.2,m11.3)
## m11.1 m11.2 m11.3
## a1 -1.92 -2.84 -2.63
## a2 -1.27 -2.16 -1.94
## a3 -0.72 -1.57 -1.34
## a4 0.25 -0.55 -0.31
## a5 0.89 0.12 0.36
## a6 1.77 1.02 1.27
## bA NA -0.71 -0.47
## bI NA -0.72 -0.28
## bC NA -0.96 -0.33
## bAI NA NA -0.45
## bCI NA NA -1.27
## nobs 9930 9930 9930
## R code 11.16
compare( m11.1 , m11.2 , m11.3 , refresh=0.1 )
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## WAIC pWAIC dWAIC weight SE dSE
## m11.3 36929.3 11.1 0.0 1 81.31 NA
## m11.2 37090.2 9.2 160.9 0 76.29 25.69
## m11.1 37854.5 6.0 925.2 0 57.61 62.64
## R code 11.17
post <- extract.samples( m11.3 )
## R code 11.18
plot( 1 , 1 , type="n" , xlab="intention" , ylab="probability" ,
xlim=c(0,1) , ylim=c(0,1) , xaxp=c(0,1,1) , yaxp=c(0,1,2) )
## R code 11.19
kA <- 0 # value for action
kC <- 1 # value for contact
kI <- 0:1 # values of intention to calculate over
for ( s in 1:100 ) {
p <- post[s,]
ak <- as.numeric(p[1:6])
phi <- p$bA*kA + p$bI*kI + p$bC*kC +
p$bAI*kA*kI + p$bCI*kC*kI
pk <- pordlogit( 1:6 , a=ak , phi=phi )
for ( i in 1:6 )
lines( kI , pk[,i] , col=col.alpha(rangi2,0.1) )
}
mtext( concat( "action=",kA,", contact=",kC ) )

## R code 11.20
# define parameters
prob_drink <- 0.2 # 20% of days
rate_work <- 1 # average 1 manuscript per day
# sample one year of production
N <- 365
# simulate days monks drink
drink <- rbinom( N , 1 , prob_drink )
# simulate manuscripts completed
y <- (1-drink)*rpois( N , rate_work )
## R code 11.21
simplehist( y , xlab="manuscripts completed" , lwd=4 )
zeros_drink <- sum(drink)
zeros_work <- sum(y==0 & drink==0)
zeros_total <- sum(y==0)
lines( c(0,0) , c(zeros_work,zeros_total) , lwd=4 , col=rangi2 )

## R code 11.22
m11.4 <- map(
alist(
y ~ dzipois( p , lambda ),
logit(p) <- ap,
log(lambda) <- al,
ap ~ dnorm(0,1),
al ~ dnorm(0,10)
) ,
data=list(y=y) )
precis(m11.4)
## Mean StdDev 5.5% 94.5%
## ap -1.28 0.31 -1.77 -0.79
## al 0.01 0.08 -0.12 0.15
## R code 11.23
logistic(-1.39) # probability drink
## [1] 0.1994078
exp(0.05) # rate finish manuscripts, when not drinking
## [1] 1.051271
## R code 11.24
dzip <- function( x , p , lambda , log=TRUE ) {
ll <- ifelse(
x==0 ,
p + (1-p)*exp(-lambda) ,
(1-p)*dpois(x,lambda,FALSE)
)
if ( log==TRUE ) ll <- log(ll)
return(ll)
}
## R code 11.25
pbar <- 0.5
theta <- 5
curve( dbeta2(x,pbar,theta) , from=0 , to=1 ,
xlab="probability" , ylab="Density" )

## R code 11.26
library(rethinking)
data(UCBadmit)
d <- UCBadmit
m11.5 <- map2stan(
alist(
admit ~ dbetabinom(applications,pbar,theta),
logit(pbar) <- a,
a ~ dnorm(0,2),
theta ~ dexp(1)
),
data=d,
constraints=list(theta="lower=0"),
start=list(theta=3),
iter=4000 , warmup=1000 , chains=2 , cores=2 )
## Warning: Variable 'applicant.gender' contains dots '.'.
## Will attempt to remove dots internally.
## Precompiling model...
## Warning in FUN(X[[i]], ...): data with name dept is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name applicant_gender is not numeric
## and not used
## the number of chains is less than 1; sampling not done
## Dispatching 2 chains to 2 cores.
## Warning in FUN(X[[i]], ...): data with name dept is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name applicant_gender is not numeric
## and not used
##
## SAMPLING FOR MODEL 'admit ~ dbetabinom(applications, pbar, theta)' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 1 [100%] (Sampling)
## # Elapsed Time: 0 seconds (Warm-up)
## # 0 seconds (Sampling)
## # 0 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 600 / 6000 ]
[ 1200 / 6000 ]
[ 1800 / 6000 ]
[ 2400 / 6000 ]
[ 3000 / 6000 ]
[ 3600 / 6000 ]
[ 4200 / 6000 ]
[ 4800 / 6000 ]
[ 5400 / 6000 ]
[ 6000 / 6000 ]
## R code 11.27
precis(m11.5)
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## theta 2.76 0.94 1.30 4.14 3009 1
## a -0.37 0.31 -0.88 0.12 3247 1
## R code 11.28
post <- extract.samples(m11.5)
quantile( logistic(post$a) , c(0.025,0.5,0.975) )
## 2.5% 50% 97.5%
## 0.2725638 0.4084137 0.5655752
## R code 11.29
post <- extract.samples(m11.5)
# draw posterior mean beta distribution
curve( dbeta2(x,mean(logistic(post$a)),mean(post$theta)) , from=0 , to=1 ,
ylab="Density" , xlab="probability admit", ylim=c(0,3) , lwd=2 )
# draw 100 beta distributions sampled from posterior
for ( i in 1:100 ) {
p <- logistic( post$a[i] )
theta <- post$theta[i]
curve( dbeta2(x,p,theta) , add=TRUE , col=col.alpha("black",0.2) )
}

## R code 11.30
postcheck(m11.5)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]

## R code 11.31
mu <- 3
theta <- 1
curve( dgamma2(x,mu,theta) , from=0 , to=10 )

## R code 11.32
library(rethinking)
data(Hurricanes)
head(Hurricanes)
## name year deaths category min_pressure damage_norm female femininity
## 1 Easy 1950 2 3 960 1590 1 6.77778
## 2 King 1950 4 3 955 5350 0 1.38889
## 3 Able 1952 3 1 985 150 0 3.83333
## 4 Barbara 1953 1 1 987 58 1 9.83333
## 5 Florence 1953 0 1 985 15 1 8.33333
## 6 Carol 1954 60 3 960 19321 1 8.11111
###################