Untitled

## R code 11.1
library(rethinking)

## Loading required package: rstan
## Loading required package: Rcpp
## Loading required package: ggplot2
## rstan (Version 2.8.0, packaged: 2015-09-19 14:48:38 UTC, GitRev: 05c3d0058b6a)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## rstan_options(auto_write = TRUE)
## options(mc.cores = parallel::detectCores())
## Loading required package: parallel
## rethinking (Version 1.57)

data(Trolley)
d <- Trolley

## R code 11.2
simplehist( d$response , xlim=c(1,7) , xlab="response" )

## R code 11.3
# discrete proportion of each response value
pr_k <- table( d$response ) / nrow(d)

# cumsum converts to cumulative proportions
cum_pr_k <- cumsum( pr_k )

# plot
plot( 1:7 , cum_pr_k , type="b" , xlab="response" ,
      ylab="cumulative proportion" , ylim=c(0,1) )

## R code 11.4
logit <- function(x) log(x/(1-x)) # convenience function
( lco <- logit( cum_pr_k ) )

##          1          2          3          4          5          6 
## -1.9160912 -1.2666056 -0.7186340  0.2477857  0.8898637  1.7693809 
##          7 
##        Inf

## R code 11.5
m11.1 <- map(
  alist(
    response ~ dordlogit( phi , c(a1,a2,a3,a4,a5,a6) ),
    phi <- 0,
    c(a1,a2,a3,a4,a5,a6) ~ dnorm(0,10)
  ) ,
  data=d ,
  start=list(a1=-2,a2=-1,a3=0,a4=1,a5=2,a6=2.5) )

## R code 11.6
precis(m11.1)

##     Mean StdDev  5.5% 94.5%
## a1 -1.92   0.03 -1.96 -1.87
## a2 -1.27   0.02 -1.31 -1.23
## a3 -0.72   0.02 -0.75 -0.68
## a4  0.25   0.02  0.22  0.28
## a5  0.89   0.02  0.85  0.93
## a6  1.77   0.03  1.72  1.81

## R code 11.7
logistic(coef(m11.1))

##        a1        a2        a3        a4        a5        a6 
## 0.1283005 0.2198398 0.3276948 0.5616311 0.7088609 0.8543786

## R code 11.8
# note that data with name 'case' not allowed in Stan
# so will pass pruned data list
m11.1stan <- map2stan(
  alist(
    response ~ dordlogit( phi , cutpoints ),
    phi <- 0,
    cutpoints ~ dnorm(0,10)
  ) ,
  data=list(response=d$response),
  start=list(cutpoints=c(-2,-1,0,1,2,2.5)) ,
  chains=2 , cores=2 )

## Precompiling model...
## the number of chains is less than 1; sampling not done
## Dispatching 2 chains to 2 cores.

## 
## SAMPLING FOR MODEL 'response ~ dordlogit(phi, cutpoints)' NOW (CHAIN 1).
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
## #  Elapsed Time: 0 seconds (Warm-up)
## #                0.016 seconds (Sampling)
## #                0.016 seconds (Total)

## Computing WAIC
## Constructing posterior predictions

## [ 200 / 2000 ]
[ 400 / 2000 ]
[ 600 / 2000 ]
[ 800 / 2000 ]
[ 1000 / 2000 ]
[ 1200 / 2000 ]
[ 1400 / 2000 ]
[ 1600 / 2000 ]
[ 1800 / 2000 ]
[ 2000 / 2000 ]

## Warning: Reached total allocation of 1535Mb: see help(memory.size)

## Warning: Reached total allocation of 1535Mb: see help(memory.size)

## Warning: Reached total allocation of 1535Mb: see help(memory.size)

## Warning: Reached total allocation of 1535Mb: see help(memory.size)

## Warning: Reached total allocation of 1535Mb: see help(memory.size)

## Warning in log_sum_exp(ll): Reached total allocation of 1535Mb: see
## help(memory.size)

## Warning: Reached total allocation of 1535Mb: see help(memory.size)

## Warning: Reached total allocation of 1535Mb: see help(memory.size)

## Warning: Reached total allocation of 1535Mb: see help(memory.size)

# need depth=2 to show vector of parameters
precis(m11.1stan,depth=2)

##               Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## cutpoints[1] -1.92   0.03      -1.96      -1.87  1169    1
## cutpoints[2] -1.27   0.02      -1.31      -1.23  1551    1
## cutpoints[3] -0.72   0.02      -0.75      -0.69  2000    1
## cutpoints[4]  0.25   0.02       0.22       0.28  2000    1
## cutpoints[5]  0.89   0.02       0.85       0.93  2000    1
## cutpoints[6]  1.77   0.03       1.73       1.82  2000    1

## R code 11.9
( pk <- dordlogit( 1:7 , 0 , coef(m11.1) ) )

## [1] 0.12830051 0.09153931 0.10785502 0.23393627 0.14722982 0.14551766
## [7] 0.14562142

## R code 11.10
sum( pk*(1:7) )

## [1] 4.199294

## R code 11.11
( pk <- dordlogit( 1:7 , 0 , coef(m11.1)-0.5 ) )

## [1] 0.08195550 0.06401015 0.08221206 0.20910054 0.15897033 0.18438530
## [7] 0.21936612

## R code 11.12
sum( pk*(1:7) )

## [1] 4.72974

## R code 11.13
m11.2 <- map(
  alist(
    response ~ dordlogit( phi , c(a1,a2,a3,a4,a5,a6) ) ,
    phi <- bA*action + bI*intention + bC*contact,
    c(bA,bI,bC) ~ dnorm(0,10),
    c(a1,a2,a3,a4,a5,a6) ~ dnorm(0,10)
  ) ,
  data=d ,
  start=list(a1=-1.9,a2=-1.2,a3=-0.7,a4=0.2,a5=0.9,a6=1.8) )

## R code 11.14
m11.3 <- map(
  alist(
    response ~ dordlogit( phi , c(a1,a2,a3,a4,a5,a6) ) ,
    phi <- bA*action + bI*intention + bC*contact +
      bAI*action*intention + bCI*contact*intention ,
    c(bA,bI,bC,bAI,bCI) ~ dnorm(0,10),
    c(a1,a2,a3,a4,a5,a6) ~ dnorm(0,10)
  ) ,
  data=d ,
  start=list(a1=-1.9,a2=-1.2,a3=-0.7,a4=0.2,a5=0.9,a6=1.8) )

## R code 11.15
coeftab(m11.1,m11.2,m11.3)

##      m11.1   m11.2   m11.3  
## a1     -1.92   -2.84   -2.63
## a2     -1.27   -2.16   -1.94
## a3     -0.72   -1.57   -1.34
## a4      0.25   -0.55   -0.31
## a5      0.89    0.12    0.36
## a6      1.77    1.02    1.27
## bA        NA   -0.71   -0.47
## bI        NA   -0.72   -0.28
## bC        NA   -0.96   -0.33
## bAI       NA      NA   -0.45
## bCI       NA      NA   -1.27
## nobs    9930    9930    9930

## R code 11.16
compare( m11.1 , m11.2 , m11.3 , refresh=0.1 )

## Constructing posterior predictions

## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]

## Constructing posterior predictions

## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]

## Constructing posterior predictions

## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]

##          WAIC pWAIC dWAIC weight    SE   dSE
## m11.3 36929.3  11.1   0.0      1 81.31    NA
## m11.2 37090.2   9.2 160.9      0 76.29 25.69
## m11.1 37854.5   6.0 925.2      0 57.61 62.64

## R code 11.17
post <- extract.samples( m11.3 )

## R code 11.18
plot( 1 , 1 , type="n" , xlab="intention" , ylab="probability" ,
      xlim=c(0,1) , ylim=c(0,1) , xaxp=c(0,1,1) , yaxp=c(0,1,2) )

## R code 11.19
kA <- 0     # value for action
kC <- 1     # value for contact
kI <- 0:1   # values of intention to calculate over
for ( s in 1:100 ) {
  p <- post[s,]
  ak <- as.numeric(p[1:6])
  phi <- p$bA*kA + p$bI*kI + p$bC*kC +
    p$bAI*kA*kI + p$bCI*kC*kI
  pk <- pordlogit( 1:6 , a=ak , phi=phi )
  for ( i in 1:6 )
    lines( kI , pk[,i] , col=col.alpha(rangi2,0.1) )
}
mtext( concat( "action=",kA,", contact=",kC ) )

## R code 11.20
# define parameters
prob_drink <- 0.2 # 20% of days
rate_work <- 1    # average 1 manuscript per day

# sample one year of production
N <- 365

# simulate days monks drink
drink <- rbinom( N , 1 , prob_drink )

# simulate manuscripts completed
y <- (1-drink)*rpois( N , rate_work )

## R code 11.21
simplehist( y , xlab="manuscripts completed" , lwd=4 )
zeros_drink <- sum(drink)
zeros_work <- sum(y==0 & drink==0)
zeros_total <- sum(y==0)
lines( c(0,0) , c(zeros_work,zeros_total) , lwd=4 , col=rangi2 )

## R code 11.22
m11.4 <- map(
  alist(
    y ~ dzipois( p , lambda ),
    logit(p) <- ap,
    log(lambda) <- al,
    ap ~ dnorm(0,1),
    al ~ dnorm(0,10)
  ) ,
  data=list(y=y) )
precis(m11.4)

##     Mean StdDev  5.5% 94.5%
## ap -1.28   0.31 -1.77 -0.79
## al  0.01   0.08 -0.12  0.15

## R code 11.23
logistic(-1.39) # probability drink

## [1] 0.1994078

exp(0.05)       # rate finish manuscripts, when not drinking

## [1] 1.051271

## R code 11.24
dzip <- function( x , p , lambda , log=TRUE ) {
  ll <- ifelse(
    x==0 ,
    p + (1-p)*exp(-lambda) ,
    (1-p)*dpois(x,lambda,FALSE)
  )
  if ( log==TRUE ) ll <- log(ll)
  return(ll)
}

## R code 11.25
pbar <- 0.5
theta <- 5
curve( dbeta2(x,pbar,theta) , from=0 , to=1 ,
       xlab="probability" , ylab="Density" )

## R code 11.26
library(rethinking)
data(UCBadmit)
d <- UCBadmit
m11.5 <- map2stan(
  alist(
    admit ~ dbetabinom(applications,pbar,theta),
    logit(pbar) <- a,
    a ~ dnorm(0,2),
    theta ~ dexp(1)
  ),
  data=d,
  constraints=list(theta="lower=0"),
  start=list(theta=3),
  iter=4000 , warmup=1000 , chains=2 , cores=2 )

## Warning: Variable 'applicant.gender' contains dots '.'.
## Will attempt to remove dots internally.
## Precompiling model...

## Warning in FUN(X[[i]], ...): data with name dept is not numeric and not
## used

## Warning in FUN(X[[i]], ...): data with name applicant_gender is not numeric
## and not used

## the number of chains is less than 1; sampling not done
## Dispatching 2 chains to 2 cores.

## Warning in FUN(X[[i]], ...): data with name dept is not numeric and not
## used

## Warning in FUN(X[[i]], ...): data with name applicant_gender is not numeric
## and not used

## 
## SAMPLING FOR MODEL 'admit ~ dbetabinom(applications, pbar, theta)' NOW (CHAIN 1).
## 
## Chain 1, Iteration: 1 / 1 [100%]  (Sampling)
## #  Elapsed Time: 0 seconds (Warm-up)
## #                0 seconds (Sampling)
## #                0 seconds (Total)

## Computing WAIC
## Constructing posterior predictions

## [ 600 / 6000 ]
[ 1200 / 6000 ]
[ 1800 / 6000 ]
[ 2400 / 6000 ]
[ 3000 / 6000 ]
[ 3600 / 6000 ]
[ 4200 / 6000 ]
[ 4800 / 6000 ]
[ 5400 / 6000 ]
[ 6000 / 6000 ]

## R code 11.27
precis(m11.5)

##        Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## theta  2.76   0.94       1.30       4.14  3009    1
## a     -0.37   0.31      -0.88       0.12  3247    1

## R code 11.28
post <- extract.samples(m11.5)
quantile( logistic(post$a) , c(0.025,0.5,0.975) )

##      2.5%       50%     97.5% 
## 0.2725638 0.4084137 0.5655752

## R code 11.29
post <- extract.samples(m11.5)

# draw posterior mean beta distribution
curve( dbeta2(x,mean(logistic(post$a)),mean(post$theta)) , from=0 , to=1 ,
       ylab="Density" , xlab="probability admit", ylim=c(0,3) , lwd=2 )

# draw 100 beta distributions sampled from posterior
for ( i in 1:100 ) {
  p <- logistic( post$a[i] )
  theta <- post$theta[i]
  curve( dbeta2(x,p,theta) , add=TRUE , col=col.alpha("black",0.2) )
}

## R code 11.30
postcheck(m11.5)

## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]

## R code 11.31
mu <- 3
theta <- 1
curve( dgamma2(x,mu,theta) , from=0 , to=10 )

## R code 11.32
library(rethinking)
data(Hurricanes)
head(Hurricanes)

##       name year deaths category min_pressure damage_norm female femininity
## 1     Easy 1950      2        3          960        1590      1    6.77778
## 2     King 1950      4        3          955        5350      0    1.38889
## 3     Able 1952      3        1          985         150      0    3.83333
## 4  Barbara 1953      1        1          987          58      1    9.83333
## 5 Florence 1953      0        1          985          15      1    8.33333
## 6    Carol 1954     60        3          960       19321      1    8.11111

###################