Assignment #8

Questions

9-1. Re-estimate the terrain ruggedness model from the chapter, but now using a uniform prior for the standard deviation, sigma. The uniform prior should be dunif(0,1). Visualize the priors. Use ulam to estimate the posterior. Visualize the posteriors for both models. Does the different prior have any detectible influence on the posterior distribution of sigma? Why or why not?

# Note: All the ulam model built in this assignment will use chain =1 to avoid R session crash and c stack issue. I tried chain =4, cores=4; but I cannot solve the c stack error. 

library(rethinking)
data(rugged)
d <- rugged
d$log_gdp <- log(d$rgdppc_2000)
dd <- d[ complete.cases(d$rgdppc_2000) , ]
dd$log_gdp_std <- dd$log_gdp / mean(dd$log_gdp)
dd$rugged_std <- dd$rugged / max(dd$rugged)
dd$cid <- ifelse( dd$cont_africa==1 , 1 , 2 )

dat_slim <- list(
  log_gdp_std = dd$log_gdp_std,
  rugged_std = dd$rugged_std,
  cid = as.integer( dd$cid )
  )

str(dat_slim)

## List of 3
##  $ log_gdp_std: num [1:170] 0.88 0.965 1.166 1.104 0.915 ...
##  $ rugged_std : num [1:170] 0.138 0.553 0.124 0.125 0.433 ...
##  $ cid        : int [1:170] 1 2 2 2 2 2 2 2 2 1 ...

# (1) uniform prior for sigma, dunif(0,1)
m9.1_1 <- ulam(
  alist(
    log_gdp_std ~ dnorm( mu , sigma ) ,
    mu <- a[cid] + b[cid]*( rugged_std - 0.215 ) ,
    a[cid] ~ dnorm( 1 , 0.1 ) ,
    b[cid] ~ dnorm( 0 , 0.3 ) ,
    sigma ~ dunif(0,1)
    ) , data=dat_slim ,  chains=1 , cores=1)

## 
## SAMPLING FOR MODEL 'af3946b6472f9e61b0f9cbd46122b12f' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:   1 / 1000 [  0%]  (Warmup)
## Chain 1: Iteration: 100 / 1000 [ 10%]  (Warmup)
## Chain 1: Iteration: 200 / 1000 [ 20%]  (Warmup)
## Chain 1: Iteration: 300 / 1000 [ 30%]  (Warmup)
## Chain 1: Iteration: 400 / 1000 [ 40%]  (Warmup)
## Chain 1: Iteration: 500 / 1000 [ 50%]  (Warmup)
## Chain 1: Iteration: 501 / 1000 [ 50%]  (Sampling)
## Chain 1: Iteration: 600 / 1000 [ 60%]  (Sampling)
## Chain 1: Iteration: 700 / 1000 [ 70%]  (Sampling)
## Chain 1: Iteration: 800 / 1000 [ 80%]  (Sampling)
## Chain 1: Iteration: 900 / 1000 [ 90%]  (Sampling)
## Chain 1: Iteration: 1000 / 1000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.085 seconds (Warm-up)
## Chain 1:                0.051 seconds (Sampling)
## Chain 1:                0.136 seconds (Total)
## Chain 1:

precis( m9.1_1 , depth=2 )

##             mean          sd         5.5%       94.5%    n_eff     Rhat4
## a[1]   0.8857747 0.015650061  0.862717603  0.91255483 779.1547 0.9979981
## a[2]   1.0503947 0.010158962  1.035156388  1.06612723 955.7061 0.9982247
## b[1]   0.1347120 0.071596850  0.009498244  0.24902191 402.5029 0.9991564
## b[2]  -0.1413206 0.055275364 -0.227368569 -0.04796078 817.2856 0.9980232
## sigma  0.1117253 0.006679002  0.101489797  0.12310250 580.8096 0.9987925

# 1a) Visualize priors 

par(mfrow=c(1,2))
set.seed(7)
prior <- extract.prior( m9.1_1 )

## 
## SAMPLING FOR MODEL '0f664d06728d15560a09242da4f6c288' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.131 seconds (Warm-up)
## Chain 1:                0.092 seconds (Sampling)
## Chain 1:                0.223 seconds (Total)
## Chain 1:

for (s in 1:2){
  d.A <- dd[ dd$cid==s , ]
  plot( NULL , xlim=c(0,1) , ylim=c(0.5,1.5) ,
        xlab="ruggedness" , ylab="log GDP" )
  abline( h=min(d.A$log_gdp_std) , lty=2 )
  abline( h=max(d.A$log_gdp_std) , lty=2 )
  # draw 50 lines from the prior
  rugged_seq <- seq( from=-0.1 , to=1.1 , length.out=30 )
  mu <- link( m9.1_1 , post=prior , data=data.frame(cid= s, rugged_std=rugged_seq) )
  for ( i in 1:50 ) lines( rugged_seq , mu[i,] , col=col.alpha("black",0.3) )
  mtext(paste("Prior Check: cid = ",s ))
}

# 1b) Visualize the posteriors

par(mfrow=c(1,2))
set.seed(7)
for (s in 1:2){
  d.A <- dd[ dd$cid==s , ]
  plot( NULL , xlim=c(0,1) , ylim=c(0.5,1.5) ,
        xlab="ruggedness" , ylab="log GDP" )
  abline( h=min(d.A$log_gdp_std) , lty=2 )
  abline( h=max(d.A$log_gdp_std) , lty=2 )
  # draw 50 lines from the prior
  rugged_seq <- seq( from=-0.1 , to=1.1 , length.out=30 )
  mu <- link( m9.1_1 , data=data.frame(cid= s, rugged_std=rugged_seq) )
  for ( i in 1:50 ) lines( rugged_seq , mu[i,] , col=col.alpha("black",0.3) )
  mtext(paste("Posterior Check: cid = ",s ))
}

# (2) exp prior for sigma, dexp(0,1)

m9.1_2 <- ulam(
  alist(
    log_gdp_std ~ dnorm( mu , sigma ) ,
    mu <- a[cid] + b[cid]*( rugged_std - 0.215 ) ,
    a[cid] ~ dnorm( 1 , 0.1 ) ,
    b[cid] ~ dnorm( 0 , 0.3 ) ,
    sigma ~ dexp( 1 )
    ) , data=dat_slim ,  chains=1 , cores=1)

## 
## SAMPLING FOR MODEL '4e638bc82c4247257e76c1202e0a46c7' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:   1 / 1000 [  0%]  (Warmup)
## Chain 1: Iteration: 100 / 1000 [ 10%]  (Warmup)
## Chain 1: Iteration: 200 / 1000 [ 20%]  (Warmup)
## Chain 1: Iteration: 300 / 1000 [ 30%]  (Warmup)
## Chain 1: Iteration: 400 / 1000 [ 40%]  (Warmup)
## Chain 1: Iteration: 500 / 1000 [ 50%]  (Warmup)
## Chain 1: Iteration: 501 / 1000 [ 50%]  (Sampling)
## Chain 1: Iteration: 600 / 1000 [ 60%]  (Sampling)
## Chain 1: Iteration: 700 / 1000 [ 70%]  (Sampling)
## Chain 1: Iteration: 800 / 1000 [ 80%]  (Sampling)
## Chain 1: Iteration: 900 / 1000 [ 90%]  (Sampling)
## Chain 1: Iteration: 1000 / 1000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.059 seconds (Warm-up)
## Chain 1:                0.039 seconds (Sampling)
## Chain 1:                0.098 seconds (Total)
## Chain 1:

precis( m9.1_2 , depth=2 )

##             mean          sd        5.5%       94.5%    n_eff     Rhat4
## a[1]   0.8871430 0.015974796  0.86232170  0.91260410 515.3586 0.9997957
## a[2]   1.0503586 0.010315478  1.03423687  1.06786336 638.9067 0.9999904
## b[1]   0.1314894 0.071234469  0.01647382  0.24134616 608.1483 0.9980014
## b[2]  -0.1423999 0.059206503 -0.23626667 -0.05060311 449.0679 0.9985078
## sigma  0.1119328 0.006679386  0.10172871  0.12292714 621.7797 0.9980180

# 2a) Visualize priors 

par(mfrow=c(1,2))
set.seed(7)
prior <- extract.prior( m9.1_2 )

## 
## SAMPLING FOR MODEL '6f32f00cf5a302d5fa05f2f490323961' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.069 seconds (Warm-up)
## Chain 1:                0.057 seconds (Sampling)
## Chain 1:                0.126 seconds (Total)
## Chain 1:

for (s in 1:2){
  d.A <- dd[ dd$cid==s , ]
  plot( NULL , xlim=c(0,1) , ylim=c(0.5,1.5) ,
        xlab="ruggedness" , ylab="log GDP" )
  abline( h=min(d.A$log_gdp_std) , lty=2 )
  abline( h=max(d.A$log_gdp_std) , lty=2 )
  # draw 50 lines from the prior
  rugged_seq <- seq( from=-0.1 , to=1.1 , length.out=30 )
  mu <- link( m9.1_2 , post=prior , data=data.frame(cid= s, rugged_std=rugged_seq) )
  for ( i in 1:50 ) lines( rugged_seq , mu[i,] , col=col.alpha("black",0.3) )
  mtext(paste("Prior Check: cid = ",s ))
}

# 2b) Visualize the posteriors

par(mfrow=c(1,2))
set.seed(7)
for (s in 1:2){
  d.A <- dd[ dd$cid==s , ]
  plot( NULL , xlim=c(0,1) , ylim=c(0.5,1.5) ,
        xlab="ruggedness" , ylab="log GDP" )
  abline( h=min(d.A$log_gdp_std) , lty=2 )
  abline( h=max(d.A$log_gdp_std) , lty=2 )
  # draw 50 lines from the prior
  rugged_seq <- seq( from=-0.1 , to=1.1 , length.out=30 )
  mu <- link( m9.1_2 , data=data.frame(cid= s, rugged_std=rugged_seq) )
  for ( i in 1:50 ) lines( rugged_seq , mu[i,] , col=col.alpha("black",0.3) )
  mtext(paste("Posterior Check: cid = ",s ))
}

# If we only focus on the prior and posterior of sigma:
# Sigma Prior Visualization:
par(mfrow=c(1,1))
curve( dunif(x,0,1) , from=0 , to=6 , xlab="sigma" , ylab="Density" , ylim=c(0,1) )
curve( dexp(x,1) , add=TRUE , col="blue" )

# Sigma Posterior Visualization:
sigma_unif <- extract.samples(m9.1_1,pars="sigma")
sigma_exp  <- extract.samples(m9.1_2,pars="sigma")
dens( sigma_unif[[1]] , xlab="sigma"  )
dens( sigma_exp[[1]] , add=TRUE , col="blue" )

# From the parameter estimates and posteriors, I don't think different priors have any detectable influence on the posterior distribution of sigma. That is because we have a lot of data for this problem, the difference in priors does not have such important impact on the posterior distribution.

9-2. Modify the terrain ruggedness model again. This time, change the prior for b[cid] to dexp(0.3). What does this do to the posterior distribution? Can you explain it?

m9.2 <- ulam(
  alist(
    log_gdp_std ~ dnorm( mu , sigma ) ,
    mu <- a[cid] + b[cid]* (rugged_std-0.215) ,
    a[cid] ~ dnorm(1,0.1),
    b[cid] ~ dexp(0.3),
    sigma ~ dexp(1)
  ) , 
  data=dat_slim , chains=1 , cores=1)

## 
## SAMPLING FOR MODEL 'ab234e9a92bfbd2bfb200c36f51328eb' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:   1 / 1000 [  0%]  (Warmup)
## Chain 1: Iteration: 100 / 1000 [ 10%]  (Warmup)
## Chain 1: Iteration: 200 / 1000 [ 20%]  (Warmup)
## Chain 1: Iteration: 300 / 1000 [ 30%]  (Warmup)
## Chain 1: Iteration: 400 / 1000 [ 40%]  (Warmup)
## Chain 1: Iteration: 500 / 1000 [ 50%]  (Warmup)
## Chain 1: Iteration: 501 / 1000 [ 50%]  (Sampling)
## Chain 1: Iteration: 600 / 1000 [ 60%]  (Sampling)
## Chain 1: Iteration: 700 / 1000 [ 70%]  (Sampling)
## Chain 1: Iteration: 800 / 1000 [ 80%]  (Sampling)
## Chain 1: Iteration: 900 / 1000 [ 90%]  (Sampling)
## Chain 1: Iteration: 1000 / 1000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.215 seconds (Warm-up)
## Chain 1:                0.075 seconds (Sampling)
## Chain 1:                0.29 seconds (Total)
## Chain 1:

precis( m9.2 , depth=2 )

##             mean          sd         5.5%      94.5%    n_eff     Rhat4
## a[1]  0.88676161 0.015986009 0.8593747566 0.91261470 230.6341 0.9981083
## a[2]  1.04824357 0.011420060 1.0296768421 1.06580210 377.9899 0.9980181
## b[1]  0.14355597 0.072648276 0.0214066851 0.26372752 140.4705 0.9986387
## b[2]  0.01838095 0.018203857 0.0009832437 0.05634543 536.6898 0.9987021
## sigma 0.11450681 0.006636296 0.1036289880 0.12554443 307.0812 1.0000757

# Posterior simulation
par(mfrow=c(1,2))
set.seed(7)
for (s in 1:2){
  d.A <- dd[ dd$cid==s , ]
  plot( NULL , xlim=c(0,1) , ylim=c(0.5,1.5) ,
        xlab="ruggedness" , ylab="log GDP" )
  abline( h=min(d.A$log_gdp_std) , lty=2 )
  abline( h=max(d.A$log_gdp_std) , lty=2 )
  rugged_seq <- seq( from=-0.1 , to=1.1 , length.out=30 )
  mu <- link( m9.2 , data=data.frame(cid= s, rugged_std=rugged_seq) )
  for ( i in 1:50 ) lines( rugged_seq , mu[i,] , col=col.alpha("black",0.3) )
  mtext(paste("Posterior Check: cid = ",s ))
}

# If we only focus on the posterior of b:
# b Posterior Visualization:
b_norm <- extract.samples(m9.1_2,pars="b")
b_exp <- extract.samples(m9.2,pars="b")
par(mfrow=c(1,2))
dens( b_norm[[1]][,1] , xlab="b[cid=1]"  )
dens( b_exp[[1]][,1] , add=TRUE , col="blue" )

dens( b_norm[[1]][,2] , xlab="b[cid=2]" ,xlim=c(-0.4,0.2), ylim=c(0,8) )
dens( b_exp[[1]][,2] , add=TRUE , col="blue" )

# Now we observe the significant difference in the b parameter. Black curves show the prior b[cid]~dnorm(0,0.3); blue curves show the prior b[cid]~ dexp(0.3). When we set the prior b[cid]~dnorm(0,0.3), we observe both positive and negative values in b,but when we set the prior b[cid]~ dexp(0.3), we restrict b to positive values only

9-3. Re-estimate one of the Stan models from the chapter, but at different numbers of warmup iterations. Be sure to use the same number of sampling iterations in each case. Compare the n_eff values. How much warmup is enough?

m9.3 <- ulam(
  alist(
    log_gdp_std ~ dnorm( mu , sigma ) ,
    mu <- a[cid] + b[cid]*( rugged_std - 0.215 ) ,
    a[cid] ~ dnorm( 1 , 0.1 ) ,
    b[cid] ~ dnorm( 0 , 0.3 ) ,
    sigma ~ dexp( 1 )
    ) , data=dat_slim ,  chains=1, cores=1 )

## 
## SAMPLING FOR MODEL '4e638bc82c4247257e76c1202e0a46c7' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:   1 / 1000 [  0%]  (Warmup)
## Chain 1: Iteration: 100 / 1000 [ 10%]  (Warmup)
## Chain 1: Iteration: 200 / 1000 [ 20%]  (Warmup)
## Chain 1: Iteration: 300 / 1000 [ 30%]  (Warmup)
## Chain 1: Iteration: 400 / 1000 [ 40%]  (Warmup)
## Chain 1: Iteration: 500 / 1000 [ 50%]  (Warmup)
## Chain 1: Iteration: 501 / 1000 [ 50%]  (Sampling)
## Chain 1: Iteration: 600 / 1000 [ 60%]  (Sampling)
## Chain 1: Iteration: 700 / 1000 [ 70%]  (Sampling)
## Chain 1: Iteration: 800 / 1000 [ 80%]  (Sampling)
## Chain 1: Iteration: 900 / 1000 [ 90%]  (Sampling)
## Chain 1: Iteration: 1000 / 1000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.041 seconds (Warm-up)
## Chain 1:                0.028 seconds (Sampling)
## Chain 1:                0.069 seconds (Total)
## Chain 1:

# define warmup values to run through
warm_list <- c(5,10,100,500,1000)

# first make matrix to hold n_eff results
n_eff <- matrix( NA , nrow=length(warm_list) , ncol=5 )

# loop over warm_list and collect n_eff
for ( i in 1:length(warm_list) ) {
  w <- warm_list[i]
  m_temp <- ulam(m9.3, chains=1, cores=1, iter=1000+w , warmup=w)
  n_eff[i,] <- precis(m_temp, depth=2)$n_eff
}

## 
## SAMPLING FOR MODEL '4e638bc82c4247257e76c1202e0a46c7' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: WARNING: No variance estimation is
## Chain 1:          performed for num_warmup < 20
## Chain 1: 
## Chain 1: Iteration:    1 / 1005 [  0%]  (Warmup)
## Chain 1: Iteration:    6 / 1005 [  0%]  (Sampling)
## Chain 1: Iteration:  105 / 1005 [ 10%]  (Sampling)
## Chain 1: Iteration:  205 / 1005 [ 20%]  (Sampling)
## Chain 1: Iteration:  305 / 1005 [ 30%]  (Sampling)
## Chain 1: Iteration:  405 / 1005 [ 40%]  (Sampling)
## Chain 1: Iteration:  505 / 1005 [ 50%]  (Sampling)
## Chain 1: Iteration:  605 / 1005 [ 60%]  (Sampling)
## Chain 1: Iteration:  705 / 1005 [ 70%]  (Sampling)
## Chain 1: Iteration:  805 / 1005 [ 80%]  (Sampling)
## Chain 1: Iteration:  905 / 1005 [ 90%]  (Sampling)
## Chain 1: Iteration: 1005 / 1005 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0 seconds (Warm-up)
## Chain 1:                0.019 seconds (Sampling)
## Chain 1:                0.019 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL '4e638bc82c4247257e76c1202e0a46c7' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: WARNING: No variance estimation is
## Chain 1:          performed for num_warmup < 20
## Chain 1: 
## Chain 1: Iteration:    1 / 1010 [  0%]  (Warmup)
## Chain 1: Iteration:   11 / 1010 [  1%]  (Sampling)
## Chain 1: Iteration:  111 / 1010 [ 10%]  (Sampling)
## Chain 1: Iteration:  212 / 1010 [ 20%]  (Sampling)
## Chain 1: Iteration:  313 / 1010 [ 30%]  (Sampling)
## Chain 1: Iteration:  414 / 1010 [ 40%]  (Sampling)
## Chain 1: Iteration:  515 / 1010 [ 50%]  (Sampling)
## Chain 1: Iteration:  616 / 1010 [ 60%]  (Sampling)
## Chain 1: Iteration:  717 / 1010 [ 70%]  (Sampling)
## Chain 1: Iteration:  818 / 1010 [ 80%]  (Sampling)
## Chain 1: Iteration:  919 / 1010 [ 90%]  (Sampling)
## Chain 1: Iteration: 1010 / 1010 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.001 seconds (Warm-up)
## Chain 1:                0.109 seconds (Sampling)
## Chain 1:                0.11 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL '4e638bc82c4247257e76c1202e0a46c7' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: WARNING: There aren't enough warmup iterations to fit the
## Chain 1:          three stages of adaptation as currently configured.
## Chain 1:          Reducing each adaptation stage to 15%/75%/10% of
## Chain 1:          the given number of warmup iterations:
## Chain 1:            init_buffer = 15
## Chain 1:            adapt_window = 75
## Chain 1:            term_buffer = 10
## Chain 1: 
## Chain 1: Iteration:    1 / 1100 [  0%]  (Warmup)
## Chain 1: Iteration:  101 / 1100 [  9%]  (Sampling)
## Chain 1: Iteration:  210 / 1100 [ 19%]  (Sampling)
## Chain 1: Iteration:  320 / 1100 [ 29%]  (Sampling)
## Chain 1: Iteration:  430 / 1100 [ 39%]  (Sampling)
## Chain 1: Iteration:  540 / 1100 [ 49%]  (Sampling)
## Chain 1: Iteration:  650 / 1100 [ 59%]  (Sampling)
## Chain 1: Iteration:  760 / 1100 [ 69%]  (Sampling)
## Chain 1: Iteration:  870 / 1100 [ 79%]  (Sampling)
## Chain 1: Iteration:  980 / 1100 [ 89%]  (Sampling)
## Chain 1: Iteration: 1090 / 1100 [ 99%]  (Sampling)
## Chain 1: Iteration: 1100 / 1100 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.015 seconds (Warm-up)
## Chain 1:                0.165 seconds (Sampling)
## Chain 1:                0.18 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL '4e638bc82c4247257e76c1202e0a46c7' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 1500 [  0%]  (Warmup)
## Chain 1: Iteration:  150 / 1500 [ 10%]  (Warmup)
## Chain 1: Iteration:  300 / 1500 [ 20%]  (Warmup)
## Chain 1: Iteration:  450 / 1500 [ 30%]  (Warmup)
## Chain 1: Iteration:  501 / 1500 [ 33%]  (Sampling)
## Chain 1: Iteration:  650 / 1500 [ 43%]  (Sampling)
## Chain 1: Iteration:  800 / 1500 [ 53%]  (Sampling)
## Chain 1: Iteration:  950 / 1500 [ 63%]  (Sampling)
## Chain 1: Iteration: 1100 / 1500 [ 73%]  (Sampling)
## Chain 1: Iteration: 1250 / 1500 [ 83%]  (Sampling)
## Chain 1: Iteration: 1400 / 1500 [ 93%]  (Sampling)
## Chain 1: Iteration: 1500 / 1500 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.039 seconds (Warm-up)
## Chain 1:                0.051 seconds (Sampling)
## Chain 1:                0.09 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL '4e638bc82c4247257e76c1202e0a46c7' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.067 seconds (Warm-up)
## Chain 1:                0.053 seconds (Sampling)
## Chain 1:                0.12 seconds (Total)
## Chain 1:

colnames(n_eff) <- rownames(precis(m_temp, depth=2))
rownames(n_eff) <- warm_list
n_eff # columns show parameters, rows show n_eff

##             a[1]       a[2]        b[1]        b[2]       sigma
## 5       8.159548   40.66774    3.309788    2.646515    3.489171
## 10    861.584546  982.82464  493.802948  705.948025  664.036611
## 100  1156.817487 1156.46586  528.895892  653.398825  537.583471
## 500  1162.383382 1363.65278 1131.865247 1517.087335  948.677178
## 1000 1178.021531 1342.24688 1609.622178 1591.467686 1384.660670

# From this model results above, as the warmup number increases, the effective number of samples gradually improves. However, I think warmup with more than 10 will be all quite satisfying.

9-4. Run the model below and then inspect the posterior distribution and explain what it is accomplishing.

mp <- ulam(
 alist(
   a ~ dnorm(0,1),
   b ~ dcauchy(0,1)
 ), data=list(y=1) , chains=1 )

## 
## SAMPLING FOR MODEL 'bcf56ee89f6cf2a4224a4139ff01c7d4' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:   1 / 1000 [  0%]  (Warmup)
## Chain 1: Iteration: 100 / 1000 [ 10%]  (Warmup)
## Chain 1: Iteration: 200 / 1000 [ 20%]  (Warmup)
## Chain 1: Iteration: 300 / 1000 [ 30%]  (Warmup)
## Chain 1: Iteration: 400 / 1000 [ 40%]  (Warmup)
## Chain 1: Iteration: 500 / 1000 [ 50%]  (Warmup)
## Chain 1: Iteration: 501 / 1000 [ 50%]  (Sampling)
## Chain 1: Iteration: 600 / 1000 [ 60%]  (Sampling)
## Chain 1: Iteration: 700 / 1000 [ 70%]  (Sampling)
## Chain 1: Iteration: 800 / 1000 [ 80%]  (Sampling)
## Chain 1: Iteration: 900 / 1000 [ 90%]  (Sampling)
## Chain 1: Iteration: 1000 / 1000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.017 seconds (Warm-up)
## Chain 1:                0.023 seconds (Sampling)
## Chain 1:                0.04 seconds (Total)
## Chain 1:

## From the code above, we know it is trying to sample from the priors.


# Posterior Visualization from the samples
set.seed(7)
post <- extract.samples(mp)
par(mfrow = c(1, 2))

dens(post$a)
curve(dnorm(x, 0, 1), from = -4, to = 4, add = T, lty = 2)
legend("topright", lty = c(1, 2), legend = c("Sample", "Exact density"), bty = "n")
mtext("Normal")

dens(post$b, xlim = c(-10, 10))
curve(dcauchy(x, 0, 1), from = -10, to = 10, add = T, lty = 2)
#legend("topright", lty = c(1, 2), legend = c("Sample", "Exact density"), bty = "n")
mtext("cauchy")

Compare the samples for the parameters a and b. Can you explain the different trace plots? If you are unfamiliar with the Cauchy distribution, you should look it up. The key feature to attend to is that it has no expected value. Can you connect this fact to the trace plot?

precis(mp)

##         mean       sd      5.5%    94.5%    n_eff     Rhat4
## a 0.01239219 1.014895 -1.634049 1.639670 112.0992 1.0255658
## b 0.07308763 8.483088 -6.938399 5.451035 120.5750 0.9987584

# Comparing the parameters a and b, we observed that b has larger sd relative to its mean magnitude.  Also, sampling for b has a much lower effective sample size than a. 

traceplot(mp, n_col=1)

## [1] 1000
## [1] 1
## [1] 1000

# The trace for b has some big spikes in it. That’s due to the feature of Cauchy distribution: Cauchy has thick tails, thus making it more likely to jump to a value that is far out there in terms of posterior probability. On the other hand, the trace plot for a is quite typical to see as a Gaussian in shape.

9-5. Recall the divorce rate example from Chapter 5. Repeat that analysis, using ulam this time, fitting models m5.1, m5.2, and m5.3. Use compare to compare the models on the basis of WAIC or PSIS. To use WAIC or PSIS with ulam, you need add the argument log_log=TRUE. Explain the model comparison results.

# load data and copy
library(rethinking)
data(WaffleDivorce)
d <- WaffleDivorce
# standardize variables
d$D <- standardize( d$Divorce )
d$M <- standardize( d$Marriage )
d$A <- standardize( d$MedianAgeMarriage )
d_trim <- list(D = d$D, M = d$M, A = d$A)

m5.1 <- ulam(
  alist(
    D ~ dnorm( mu , sigma ) ,
    mu <- a + bA * A ,
    a ~ dnorm( 0 , 0.2 ) ,
    bA ~ dnorm( 0 , 0.5 ) ,
    sigma ~ dexp( 1 )
) , data = d_trim, chains = 1, cores=1, log_lik=TRUE )

## 
## SAMPLING FOR MODEL '5605ea5f0769a3b35ef3557a0659786d' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:   1 / 1000 [  0%]  (Warmup)
## Chain 1: Iteration: 100 / 1000 [ 10%]  (Warmup)
## Chain 1: Iteration: 200 / 1000 [ 20%]  (Warmup)
## Chain 1: Iteration: 300 / 1000 [ 30%]  (Warmup)
## Chain 1: Iteration: 400 / 1000 [ 40%]  (Warmup)
## Chain 1: Iteration: 500 / 1000 [ 50%]  (Warmup)
## Chain 1: Iteration: 501 / 1000 [ 50%]  (Sampling)
## Chain 1: Iteration: 600 / 1000 [ 60%]  (Sampling)
## Chain 1: Iteration: 700 / 1000 [ 70%]  (Sampling)
## Chain 1: Iteration: 800 / 1000 [ 80%]  (Sampling)
## Chain 1: Iteration: 900 / 1000 [ 90%]  (Sampling)
## Chain 1: Iteration: 1000 / 1000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.019 seconds (Warm-up)
## Chain 1:                0.016 seconds (Sampling)
## Chain 1:                0.035 seconds (Total)
## Chain 1:

m5.2 <- ulam(
  alist(
    D ~ dnorm( mu , sigma ) ,
    mu <- a + bM * M ,
    a ~ dnorm( 0 , 0.2 ) ,
    bM ~ dnorm( 0 , 0.5 ) ,
    sigma ~ dexp( 1 )
) , data = d_trim, chains = 1, cores=1, log_lik=TRUE )

## 
## SAMPLING FOR MODEL '5562557d1eb945f85e38992afb87f838' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:   1 / 1000 [  0%]  (Warmup)
## Chain 1: Iteration: 100 / 1000 [ 10%]  (Warmup)
## Chain 1: Iteration: 200 / 1000 [ 20%]  (Warmup)
## Chain 1: Iteration: 300 / 1000 [ 30%]  (Warmup)
## Chain 1: Iteration: 400 / 1000 [ 40%]  (Warmup)
## Chain 1: Iteration: 500 / 1000 [ 50%]  (Warmup)
## Chain 1: Iteration: 501 / 1000 [ 50%]  (Sampling)
## Chain 1: Iteration: 600 / 1000 [ 60%]  (Sampling)
## Chain 1: Iteration: 700 / 1000 [ 70%]  (Sampling)
## Chain 1: Iteration: 800 / 1000 [ 80%]  (Sampling)
## Chain 1: Iteration: 900 / 1000 [ 90%]  (Sampling)
## Chain 1: Iteration: 1000 / 1000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.018 seconds (Warm-up)
## Chain 1:                0.017 seconds (Sampling)
## Chain 1:                0.035 seconds (Total)
## Chain 1:

m5.3 <- ulam(
  alist(
    D ~ dnorm( mu , sigma ) ,
    mu <- a + bM*M + bA*A ,
    a ~ dnorm( 0 , 0.2 ) ,
    bM ~ dnorm( 0 , 0.5 ) ,
    bA ~ dnorm( 0 , 0.5 ) ,
    sigma ~ dexp( 1 )
) , data = d_trim, chains = 1, cores=1, log_lik=TRUE )

## 
## SAMPLING FOR MODEL '3fea1cdc1f8786451ed127638cb7b165' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:   1 / 1000 [  0%]  (Warmup)
## Chain 1: Iteration: 100 / 1000 [ 10%]  (Warmup)
## Chain 1: Iteration: 200 / 1000 [ 20%]  (Warmup)
## Chain 1: Iteration: 300 / 1000 [ 30%]  (Warmup)
## Chain 1: Iteration: 400 / 1000 [ 40%]  (Warmup)
## Chain 1: Iteration: 500 / 1000 [ 50%]  (Warmup)
## Chain 1: Iteration: 501 / 1000 [ 50%]  (Sampling)
## Chain 1: Iteration: 600 / 1000 [ 60%]  (Sampling)
## Chain 1: Iteration: 700 / 1000 [ 70%]  (Sampling)
## Chain 1: Iteration: 800 / 1000 [ 80%]  (Sampling)
## Chain 1: Iteration: 900 / 1000 [ 90%]  (Sampling)
## Chain 1: Iteration: 1000 / 1000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.034 seconds (Warm-up)
## Chain 1:                0.032 seconds (Sampling)
## Chain 1:                0.066 seconds (Total)
## Chain 1:

compare(m5.1, m5.2, m5.3, func = PSIS)

##          PSIS        SE      dPSIS      dSE    pPSIS       weight
## m5.1 126.4464 13.350393  0.0000000       NA 4.076947 0.5771568315
## m5.3 127.0717 12.400778  0.6253072 1.345728 4.439545 0.4221921009
## m5.2 140.0210  9.896579 13.5745117 9.790636 3.272813 0.0006510676

compare(m5.1, m5.2, m5.3, func = WAIC)

##          WAIC        SE      dWAIC      dSE    pWAIC       weight
## m5.1 126.5186 13.302779  0.0000000       NA 4.113021 0.5750323683
## m5.3 127.1268 12.354749  0.6081891 1.345549 4.467060 0.4242537583
## m5.2 139.9015  9.751848 13.3829521 9.889055 3.213107 0.0007138733

## From the WAIC and PSIS above, model m5.1 with only median age at marriage as a predictor performs best, but it is not really distinguishable from model m5.3. And, the model with marriage rate only, m5.2 clearly performs worse than both.

## Then we check the parameter:
precis(m5.3)

##               mean         sd       5.5%      94.5%    n_eff     Rhat4
## a      0.003446968 0.10577945 -0.1656734  0.1711785 419.8617 0.9982722
## bM    -0.057681781 0.16206387 -0.3102394  0.2014195 309.3541 0.9980013
## bA    -0.608000598 0.15349657 -0.8398653 -0.3547389 301.7649 0.9981721
## sigma  0.832185773 0.08393644  0.7016160  0.9732231 403.8810 0.9999053

# While this model5.3 also includes marriage rate as a predictor, it estimates very little expected influence for it, as well as substantial uncertainty about the direction of any influence it might have (bM mean is very close to 0, std is much larger than the mean, so the direction is not at any side of 0). Also we observe that the penalty in WAIC in m5.3 is larger than m5.1. So models m5.3 and m5.1 make practically the same predictions.

Assignment #8

Yuanzhen Cai

2021-07-30

Chapter 9 - Markov Chain Monte Carlo

Questions