Easy

13E1

alist(y~dnorm(mu,sigma),
      mu= a[group] + b[group]*x,
      c(a,b)[group] ~ dmvnorm2(c(a,b), sigma_group, Rho),
      c(a,b) ~ dnorm(0,10),
      sigma_group ~ dcauchy(0,2)
      Rho~dlkjcorr(2),
      sigma ~ dchauchy(0,2)
)

13E2

Rich people tend to have higher income. If you would predict savings in a few years based on savings now, people with more savings at baseline would probably also have a steeper increase in savings, due to compound interest etc.

13E3

Medium

13M1

Simulate the data like in the book:

a = 3.5            # average morning wait time
b = -1             # average difference afternoon wait time
sigma_a = 1        # std dev in intercepts
sigma_b = 0.5      # std dev in slopes

rho <- (0)          # correlation between intercepts and slopes is now zero

## R code 13.2
Mu <- c( a , b )

## R code 13.3
cov_ab <- sigma_a*sigma_b*rho
Sigma <- matrix( c(sigma_a^2,cov_ab,cov_ab,sigma_b^2) , ncol=2 )

## R code 13.5
sigmas <- c(sigma_a,sigma_b) # standard deviations
Rho <- matrix( c(1,rho,rho,1) , nrow=2 ) # correlation matrix

# now matrix multiply to get covariance matrix
Sigma <- diag(sigmas) %*% Rho %*% diag(sigmas)

## R code 13.6
N_cafes <- 20

## R code 13.7
library(MASS)
set.seed(10) # used to replicate example
vary_effects <- mvrnorm( N_cafes , Mu , Sigma )

## R code 13.8
a_cafe <- vary_effects[,1]
b_cafe <- vary_effects[,2]

## R code 13.9
plot( a_cafe , b_cafe , col=rangi2 ,
      xlab="intercepts (a_cafe)" , ylab="slopes (b_cafe)" )

# overlay population distribution
library(ellipse)
for ( l in c(0.1,0.3,0.5,0.8,0.99) )
  lines(ellipse(Sigma,centre=Mu,level=l),col=col.alpha("black",0.2))

## R code 13.10
N_visits <- 10
afternoon <- rep(0:1,N_visits*N_cafes/2)
cafe_id <- rep( 1:N_cafes , each=N_visits )
mu <- a_cafe[cafe_id] + b_cafe[cafe_id]*afternoon
sigma <- 0.5  # std dev within cafes
wait <- rnorm( N_visits*N_cafes , mu , sigma )
d <- data.frame( cafe=cafe_id , afternoon=afternoon , wait=wait )

Fit the model and inspect posterior distribution for correlation:

mod.file13.1="mod13_1.Rdata"
if(file.exists(mod.file13.1)){load(mod.file13.1)}else{
  m13.1 <- map2stan(
    alist(
      wait ~ dnorm( mu , sigma ),
      mu <- a_cafe[cafe] + b_cafe[cafe]*afternoon,
      c(a_cafe,b_cafe)[cafe] ~ dmvnorm2(c(a,b),sigma_cafe,Rho),
      a ~ dnorm(0,10),
      b ~ dnorm(0,10),
      sigma_cafe ~ dcauchy(0,2),
      sigma ~ dcauchy(0,2),
      Rho ~ dlkjcorr(2)
    ) ,
    data=d ,
    iter=5000 , warmup=2000 , chains=2 )
  save(m13.1, file=mod.file13.1)
}
## R code 13.13
post <- extract.samples(m13.1)
dens( post$Rho[,1,2] )

The predicted Rho is not centered around 0. It is also quite sensitive to a different seed (not shown).

Increase the amount of data to see if this matters:

## R code 13.10
N_visits <- 100
afternoon <- rep(0:1,N_visits*N_cafes/2)
cafe_id <- rep( 1:N_cafes , each=N_visits )
mu <- a_cafe[cafe_id] + b_cafe[cafe_id]*afternoon
sigma <- 0.5  # std dev within cafes
wait <- rnorm( N_visits*N_cafes , mu , sigma )
d <- data.frame( cafe=cafe_id , afternoon=afternoon , wait=wait )

mod.file13.1.2="mod13_1_2.Rdata"
if(file.exists(mod.file13.1.2)){load(mod.file13.1.2)}else{
  
  m13.1 <- map2stan(
    alist(
      wait ~ dnorm( mu , sigma ),
      mu <- a_cafe[cafe] + b_cafe[cafe]*afternoon,
      c(a_cafe,b_cafe)[cafe] ~ dmvnorm2(c(a,b),sigma_cafe,Rho),
      a ~ dnorm(0,10),
      b ~ dnorm(0,10),
      sigma_cafe ~ dcauchy(0,2),
      sigma ~ dcauchy(0,2),
      Rho ~ dlkjcorr(2)
    ) ,
    data=d ,
    iter=4000 , warmup=1000 , chains=3, cores=3)
  save(m13.1,file=mod.file13.1.2)
}
## R code 13.13
post <- extract.samples(m13.1)
dens( post$Rho[,1,2] )

Now it is much more accurate.

13M2

Simulate the original data again

a = 3.5            # average morning wait time
b = -1             # average difference afternoon wait time
sigma_a = 1        # std dev in intercepts
sigma_b = 0.5      # std dev in slopes

rho = -0.7         # correlation between intercepts and slopes

## R code 13.2
Mu <- c( a , b )

## R code 13.3
cov_ab <- sigma_a*sigma_b*rho
Sigma <- matrix( c(sigma_a^2,cov_ab,cov_ab,sigma_b^2) , ncol=2 )

## R code 13.5
sigmas <- c(sigma_a,sigma_b) # standard deviations
Rho <- matrix( c(1,rho,rho,1) , nrow=2 ) # correlation matrix

# now matrix multiply to get covariance matrix
Sigma <- diag(sigmas) %*% Rho %*% diag(sigmas)

## R code 13.6
N_cafes <- 20

## R code 13.7
library(MASS)
set.seed(10) # used to replicate example
vary_effects <- mvrnorm( N_cafes , Mu , Sigma )

## R code 13.8
a_cafe <- vary_effects[,1]
b_cafe <- vary_effects[,2]

## R code 13.9
plot( a_cafe , b_cafe , col=rangi2 ,
      xlab="intercepts (a_cafe)" , ylab="slopes (b_cafe)" )

# overlay population distribution
library(ellipse)
for ( l in c(0.1,0.3,0.5,0.8,0.99) )
  lines(ellipse(Sigma,centre=Mu,level=l),col=col.alpha("black",0.2))

## R code 13.10
N_visits <- 10
afternoon <- rep(0:1,N_visits*N_cafes/2)
cafe_id <- rep( 1:N_cafes , each=N_visits )
mu <- a_cafe[cafe_id] + b_cafe[cafe_id]*afternoon
sigma <- 0.5  # std dev within cafes
wait <- rnorm( N_visits*N_cafes , mu , sigma )
d <- data.frame( cafe=cafe_id , afternoon=afternoon , wait=wait )
mod.file13M2="mod13M2.Rdata"
if(file.exists(mod.file13M2)){load(mod.file13M2)}else{
  
  m_chapter <- map2stan(
    alist(
      wait ~ dnorm( mu , sigma ),
      mu <- a_cafe[cafe] + b_cafe[cafe]*afternoon,
      c(a_cafe,b_cafe)[cafe] ~ dmvnorm2(c(a,b),sigma_cafe,Rho),
      a ~ dnorm(0,10),
      b ~ dnorm(0,10),
      sigma_cafe ~ dcauchy(0,2),
      sigma ~ dcauchy(0,2),
      Rho ~ dlkjcorr(2)
    ) ,
    data=d ,
    iter=5000 , warmup=2000 , chains=2 )
  
  m_new <- map2stan(
    alist(
      wait ~ dnorm( mu , sigma ),
      mu <- a_cafe[cafe] + b_cafe[cafe]*afternoon,
      a_cafe[cafe] ~ dnorm(a,sigma_a),
      b_cafe[cafe] ~ dnorm(b,sigma_b),
      c(a,b) ~ dnorm(0,10),
      c(sigma_a,sigma_b) ~ dcauchy(0,2),
      
      sigma ~ dcauchy(0,2)
    ) ,
    data=d ,
    iter=5000 , warmup=2000 , chains=2 )
  save(m_chapter,m_new,file=mod.file13M2)
}
compare(m_chapter,m_new)
##            WAIC pWAIC dWAIC weight    SE  dSE
## m_chapter 301.7  33.1   0.0   0.71 16.81   NA
## m_new     303.5  32.8   1.8   0.29 16.94 2.44

The original is better, but not that much difference actually.

The new model does not explicitly determine the correlation between a and b. Inspect the correlations between a and b for each cafe:

post_new <- extract.samples(m_new)

correlations <- vector(length = 20)
for(i in 1:20){ correlations[i] <- cor(x=post_new$a_cafe[,1],y=post_new$b_cafe[,1]) }
correlations
##  [1] -0.658916 -0.658916 -0.658916 -0.658916 -0.658916 -0.658916 -0.658916
##  [8] -0.658916 -0.658916 -0.658916 -0.658916 -0.658916 -0.658916 -0.658916
## [15] -0.658916 -0.658916 -0.658916 -0.658916 -0.658916 -0.658916

This makes the correlation fixed, rather than having a distribution.

13M3

This is more or less the same problem as 13M2.

rm(list=ls())

## R code 13.17
library(rethinking)
data(UCBadmit)
d <- UCBadmit
d$male <- ifelse( d$applicant.gender=="male" , 1 , 0 )
d$dept_id <- coerce_index( d$dept )

## R code 13.19
mod.file13M3="mod13M3.Rdata"
if(file.exists(mod.file13M3)){load(mod.file13M3)}else{
  m_orig <- map2stan(
    alist(
      admit ~ dbinom( applications , p ),
      logit(p) <- a_dept[dept_id] + bm_dept[dept_id]*male,
      c(a_dept,bm_dept)[dept_id] ~ dmvnorm2( c(a,bm) , sigma_dept , Rho ),
      a ~ dnorm(0,10),
      bm ~ dnorm(0,1),
      sigma_dept ~ dcauchy(0,2),
      Rho ~ dlkjcorr(2)
    ) ,
    data=d , warmup=1000 , iter=5000 , chains=4 , cores=3 )
  
  m_new <- map2stan(
    alist(
      admit ~ dbinom( applications , p ),
      logit(p) <- a_dept[dept_id] + bm_dept[dept_id]*male,
      c(a_dept,bm_dept)[dept_id] ~ dmvnorm2( c(a,bm) , sigma_dept , Rho ),
      a ~ dnorm(0,10),
      bm ~ dnorm(0,1),
      sigma_dept ~ dcauchy(0,2),
      Rho ~ dlkjcorr(2)
    ) ,
    data=d , warmup=1000 , iter=5000 , chains=4 , cores=3 )
  save(m_orig,m_new,file=mod.file13M3)
}

Hard

13H1

rm(list=ls())
library(rethinking)

data(bangladesh)
d <- bangladesh

d$district_id <- as.integer(as.factor(d$district))

mod.file13H1="mod13H1.Rdata"
if(file.exists(mod.file13H1)){load(mod.file13H1)}else{
  mm <- map2stan(
    alist(
      use.contraception ~ dbinom(1, p),
      logit(p) <- a_district[district_id] + bU_district[district_id]*urban,
      c(a_district,bU_district)[district_id] ~ dmvnorm2( c(a,bU) , sigma_district , Rho ),
      a ~ dnorm(0,10),
      bU ~ dnorm(0,1),
      sigma_district ~ dcauchy(0,2),
      Rho ~ dlkjcorr(2)
    ) ,
    data=d , warmup=1000 , iter=5000 , chains=4 , cores=3 )
  save(mm, file=mod.file13H1)
}

Posterior distribution of Rho:

post <- extract.samples(mm)
dens(post$Rho[,1,2], main = "Rho")

Per district:

# extract the slope and intercept for each district.
pr <- precis(mm, depth = 2)
district_params <- data.frame(bU=pr@output[1:60,1]+pr@output[122,1],a=pr@output[61:120,1]+pr@output[121,1])
plot(district_params$a~district_params$bU)

par(mfrow=c(1,2))

library(dplyr)
d_sum <- d %>% as.tbl() %>% group_by(district_id,urban) %>% summarize(p=mean(use.contraception))

plot(y=d_sum$p, x=d_sum$urban, main="Data")
for(i in d_sum$district_id){
  y0=as.numeric(d_sum[d_sum$district_id==i & d_sum$urban==0,"p"])
  y1=as.numeric(d_sum[d_sum$district_id==i & d_sum$urban==1,"p"])
  segments(x0=0,x1=1,y0=y0,y1=y1,col="grey",lty=2)
}

y = link(mm)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
y.PI <- apply(y,2,PI)
preds=data.frame(district_id=d$district_id, urban=d$urban, y.mean=colMeans(y))

# summarize predictions per district
y_sum <- preds %>% as.tbl() %>% group_by(district_id,urban) %>% summarize(p=mean(y.mean))

plot(y=y_sum$p, x=y_sum$urban, main="Model Prediction")
for(i in y_sum$district_id){
  y0=as.numeric(y_sum[y_sum$district_id==i & y_sum$urban==0,"p"])
  y1=as.numeric(y_sum[y_sum$district_id==i & y_sum$urban==1,"p"])
  segments(x0=0,x1=1,y0=y0,y1=y1,col="steelblue",lty=2)
}

There are more of the higher slopes in the districts with low probability to begin with. Also, there are some negative slopes in the districts with high starting probability.

13H2

rm(list=ls())
data("Oxboys")

mod.file13H2="mod13H2.Rdata"
if(file.exists(mod.file13H2)){load(mod.file13H2)}else{
  mm <- map2stan(
    alist(
      height ~ dnorm(mu, sigma),
      mu <- a_boy[Subject] + bO_boy[Subject]*Occasion,
      c(a_boy,bO_boy)[Subject] ~ dmvnorm2( c(a,bO) , sigma_subject , Rho ),
      a ~ dnorm(0,10),
      bO ~ dnorm(0,1),
      c(sigma_subject,sigma) ~ dcauchy(0,2),
      Rho ~ dlkjcorr(2)
    ) ,
    data=Oxboys , warmup=2000 , iter=5000 , chains=4 , cores=3 )
  save(mm, file=mod.file13H2)
}
(pr <- precis(mm, depth = 2))
##                    Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## bO_boy[1]          1.79   0.08       1.65       1.92 12000    1
## bO_boy[2]          1.37   0.08       1.24       1.50 12000    1
## bO_boy[3]          1.21   0.08       1.08       1.34 12000    1
## bO_boy[4]          2.32   0.08       2.18       2.44 12000    1
## bO_boy[5]          1.57   0.08       1.43       1.69 12000    1
## bO_boy[6]          1.02   0.08       0.89       1.15 12000    1
## bO_boy[7]          1.27   0.08       1.14       1.41 12000    1
## bO_boy[8]          1.62   0.08       1.49       1.76 12000    1
## bO_boy[9]          1.51   0.08       1.39       1.64 12000    1
## bO_boy[10]         0.96   0.08       0.83       1.09 12000    1
## bO_boy[11]         2.11   0.08       1.98       2.24 12000    1
## bO_boy[12]         1.75   0.08       1.62       1.88 12000    1
## bO_boy[13]         2.09   0.08       1.96       2.22 12000    1
## bO_boy[14]         2.15   0.08       2.02       2.29 12000    1
## bO_boy[15]         1.78   0.08       1.65       1.91 12000    1
## bO_boy[16]         1.16   0.08       1.02       1.29 12000    1
## bO_boy[17]         2.14   0.08       2.02       2.28 12000    1
## bO_boy[18]         1.49   0.08       1.36       1.62 12000    1
## bO_boy[19]         2.25   0.08       2.12       2.38 12000    1
## bO_boy[20]         1.11   0.08       0.98       1.25 12000    1
## bO_boy[21]         1.87   0.08       1.74       2.01 12000    1
## bO_boy[22]         2.01   0.09       1.88       2.15 12000    1
## bO_boy[23]         1.79   0.08       1.67       1.93 12000    1
## bO_boy[24]         1.70   0.08       1.57       1.83 12000    1
## bO_boy[25]         1.03   0.08       0.90       1.17 12000    1
## bO_boy[26]         1.41   0.08       1.28       1.54 12000    1
## a_boy[1]         139.32   0.48     138.58     140.11 12000    1
## a_boy[2]         136.14   0.47     135.40     136.90 12000    1
## a_boy[3]         149.70   0.47     148.96     150.46 12000    1
## a_boy[4]         153.70   0.47     152.96     154.46 12000    1
## a_boy[5]         143.74   0.47     142.99     144.49 12000    1
## a_boy[6]         141.79   0.46     141.05     142.52 12000    1
## a_boy[7]         139.92   0.48     139.18     140.70 12000    1
## a_boy[8]         140.33   0.47     139.55     141.06 12000    1
## a_boy[9]         130.73   0.46     129.99     131.45 12000    1
## a_boy[10]        125.56   0.47     124.79     126.28 12000    1
## a_boy[11]        139.68   0.47     138.92     140.42 12000    1
## a_boy[12]        148.23   0.46     147.46     148.92 12000    1
## a_boy[13]        145.84   0.47     145.10     146.59 12000    1
## a_boy[14]        148.92   0.47     148.19     149.68 12000    1
## a_boy[15]        135.53   0.47     134.80     136.29 12000    1
## a_boy[16]        141.86   0.46     141.15     142.63 12000    1
## a_boy[17]        132.47   0.47     131.72     133.22 12000    1
## a_boy[18]        143.84   0.46     143.09     144.57 12000    1
## a_boy[19]        153.54   0.47     152.78     154.29 12000    1
## a_boy[20]        146.00   0.48     145.21     146.72 12000    1
## a_boy[21]        141.33   0.47     140.55     142.04 12000    1
## a_boy[22]        144.68   0.48     143.91     145.43 12000    1
## a_boy[23]        142.25   0.47     141.52     143.01 12000    1
## a_boy[24]        144.80   0.46     144.04     145.52 12000    1
## a_boy[25]        134.14   0.47     133.40     134.93 12000    1
## a_boy[26]        131.07   0.47     130.30     131.80 12000    1
## a                 23.35  11.36       5.33      41.49 12000    1
## bO                 1.24   0.48       0.49       1.89  2138    1
## sigma_subject[1] 118.59  20.74      85.58     148.81 12000    1
## sigma_subject[2]   0.67   0.33       0.32       1.07  1699    1
## sigma              0.66   0.03       0.60       0.71 12000    1
## Rho[1,1]           1.00   0.00       1.00       1.00 12000  NaN
## Rho[1,2]           0.44   0.43      -0.17       0.99  6045    1
## Rho[2,1]           0.44   0.43      -0.17       0.99  6045    1
## Rho[2,2]           1.00   0.00       1.00       1.00 12000    1

lowest slope around 1, highest around 2, so slope can matter ~14cm in this timespan.

The height difference is over 20 cm, so the initial height is more important in this timespan, but because the two are correlated (see below) it is difficult to say what is more important. Of course, the higher starting point was already caused by a faster growth rate in the past.

13H3

post <- extract.samples(mm)
dens(post$Rho[,1,2], main = "Rho")

Per boy:

# extract the slope and intercept for each district.

oxboy_params <- data.frame(bU=pr@output[1:26,1]+pr@output[54,1],a=pr@output[27:52,1]+pr@output[53,1])
plot(x=oxboy_params$a,y=oxboy_params$bU, xlab="starting height", ylab="growth speed")

Tall boys tend to also grow faster, but although this is not universally true, some boys level off, some have a period of fast growth.

13H2_v2

rm(list=ls())
data("Oxboys")

mod.file13H2_2="mod13H2_2.Rdata"
if(file.exists(mod.file13H2_2)){load(mod.file13H2_2)}else{
  mm <- map2stan(
    alist(
      height ~ dnorm(mu, sigma),
      mu <- a + a_boy[Subject] + (bO + bO_boy[Subject])*Occasion,
      c(a_boy,bO_boy)[Subject] ~ dmvnorm2( c(0,0) , sigma_subject , Rho ),
      a ~ dnorm(0,10),
      bO ~ dnorm(0,1),
      c(sigma_subject,sigma) ~ dcauchy(0,2),
      Rho ~ dlkjcorr(2)
    ) ,
    data=Oxboys , warmup=1000 , iter=5000 , chains=4 , cores=3 )
  save(mm,file=mod.file13H2_2)
}
(pr <- precis(mm, depth = 2))
##                    Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## bO_boy[1]          0.30   0.22      -0.01       0.61     7 1.20
## bO_boy[2]         -0.12   0.22      -0.44       0.17     7 1.21
## bO_boy[3]         -0.26   0.21      -0.58       0.02     9 1.17
## bO_boy[4]          0.84   0.22       0.52       1.12     9 1.18
## bO_boy[5]          0.09   0.22      -0.23       0.38     8 1.19
## bO_boy[6]         -0.46   0.22      -0.79      -0.18     8 1.18
## bO_boy[7]         -0.21   0.22      -0.53       0.08     8 1.19
## bO_boy[8]          0.14   0.22      -0.18       0.43     8 1.20
## bO_boy[9]          0.02   0.22      -0.32       0.30     7 1.22
## bO_boy[10]        -0.54   0.22      -0.88      -0.25     7 1.22
## bO_boy[11]         0.62   0.22       0.30       0.92     7 1.21
## bO_boy[12]         0.27   0.21      -0.04       0.57     8 1.18
## bO_boy[13]         0.61   0.22       0.31       0.91     8 1.19
## bO_boy[14]         0.68   0.22       0.37       0.97     8 1.19
## bO_boy[15]         0.29   0.22      -0.02       0.60     7 1.21
## bO_boy[16]        -0.32   0.22      -0.64      -0.04     8 1.19
## bO_boy[17]         0.64   0.22       0.32       0.94     7 1.22
## bO_boy[18]         0.02   0.22      -0.30       0.31     8 1.19
## bO_boy[19]         0.78   0.22       0.45       1.07     8 1.18
## bO_boy[20]        -0.36   0.21      -0.69      -0.08     9 1.18
## bO_boy[21]         0.39   0.22       0.07       0.68     8 1.20
## bO_boy[22]         0.53   0.22       0.20       0.81     8 1.19
## bO_boy[23]         0.31   0.22      -0.01       0.61     8 1.20
## bO_boy[24]         0.22   0.22      -0.10       0.51     8 1.19
## bO_boy[25]        -0.45   0.22      -0.78      -0.16     7 1.20
## bO_boy[26]        -0.08   0.22      -0.40       0.21     7 1.21
## a_boy[1]          29.88  49.28      -2.86     116.73     2 8.60
## a_boy[2]          26.69  49.28      -6.16     113.61     2 8.60
## a_boy[3]          40.16  49.34       7.47     127.22     2 8.60
## a_boy[4]          44.18  49.33      11.61     131.36     2 8.60
## a_boy[5]          34.26  49.31       1.66     121.30     2 8.61
## a_boy[6]          32.30  49.31      -0.51     119.29     2 8.60
## a_boy[7]          30.45  49.30      -2.66     117.00     2 8.60
## a_boy[8]          30.87  49.29      -2.00     117.67     2 8.60
## a_boy[9]          21.33  49.26     -11.50     108.14     2 8.59
## a_boy[10]         16.18  49.25     -16.50     103.00     2 8.60
## a_boy[11]         30.25  49.27      -2.45     117.10     2 8.59
## a_boy[12]         38.71  49.32       6.03     125.83     2 8.61
## a_boy[13]         36.36  49.30       3.70     123.38     2 8.60
## a_boy[14]         39.41  49.31       6.65     126.39     2 8.60
## a_boy[15]         26.11  49.27      -6.67     112.96     2 8.60
## a_boy[16]         32.37  49.31      -0.63     119.08     2 8.60
## a_boy[17]         23.09  49.25      -9.67     109.82     2 8.60
## a_boy[18]         34.34  49.31       1.37     121.05     2 8.60
## a_boy[19]         44.02  49.33      11.31     131.11     2 8.59
## a_boy[20]         36.48  49.33       3.87     123.61     2 8.60
## a_boy[21]         31.87  49.29      -0.79     118.82     2 8.60
## a_boy[22]         35.20  49.30       2.48     122.14     2 8.60
## a_boy[23]         32.78  49.30      -0.01     119.59     2 8.60
## a_boy[24]         35.31  49.31       2.85     122.58     2 8.60
## a_boy[25]         24.68  49.28      -8.10     111.52     2 8.60
## a_boy[26]         21.67  49.26     -11.05     108.44     2 8.60
## a                109.46  49.29      22.58     142.10     2 8.62
## bO                 1.48   0.20       1.22       1.76     7 1.24
## sigma_subject[1]  35.30  48.30       5.17     118.50     2 5.00
## sigma_subject[2]   0.49   0.12       0.33       0.63     8 1.18
## sigma              0.66   0.03       0.60       0.71  3846 1.00
## Rho[1,1]           1.00   0.00       1.00       1.00 16000  NaN
## Rho[1,2]           0.42   0.24       0.06       0.80   158 1.04
## Rho[2,1]           0.42   0.24       0.06       0.80   158 1.04
## Rho[2,2]           1.00   0.00       1.00       1.00 16000 1.00
post <- extract.samples(mm)
dens(post$Rho[,1,2], main = "Rho")

# extract the slope and intercept for each district.
oxboy_params <- data.frame(bU=pr@output[1:26,1]+pr@output[54,1],a=pr@output[27:52,1]+pr@output[53,1])
plot(x=oxboy_params$a,y=oxboy_params$bU, xlab="starting height", ylab="growth speed")

13H3

library(MASS)
# ?mvrnorm

# get the average for a and b0 from the posterior predictions
bO_total <- apply(post$bO_boy,2,function(bO_subject){bO_subject+post$bO})
dens(bO_total)

a_total <- apply(post$a_boy,2,function(a_subject){a_subject+post$a})
dens(a_total)

Definitely not normally distributed, so mean and sd don’t make much sense.

mu<-c(mean(a_total),mean(bO_total))

sa = sd(a_total)
sb = sd(bO_total)
r = mean(post$Rho[,1,2])

Sigma = matrix(c(sa^2,sa*sb*r,sa*sb*r,sb^2),nrow=2)

newboys <- mvrnorm(100, mu, Sigma)
colnames(newboys)<-c("a","b")
plot(x=newboys[,"a"],y=newboys[,"b"])
points(x=oxboy_params$a,y=oxboy_params$bU, pch=2, col="blue")

Not far off the real data.

How do I incorporate the uncertainty in a and b0?

vectorize the above:

mu=data.frame(a=rowMeans(a_total),b=rowMeans(bO_total))
sa = apply(a_total,1,sd)
sb = apply(bO_total,1,sd)
r = post$Rho[,1,2]

SigmaArray <- array(rep(NA,2*2*length(sa)),dim=c(2,2,length(sa)))
for(i in 1:length(sa)){
  SigmaArray[,,i]<-matrix(c(sa[i]^2,sa[i]*sb[i]*r[i],sa[i]*sb[i]*r[i],sb[i]^2),nrow=2)
}

# this doesn't work
# newboys2 <- mvrnorm(100, mu, SigmaArray)

This doesn’t work in vectorized format. Could draw each sample from a randomly selected set of parameters:

Then it doesn’t make too much sense to draw only 10 samples.

newboys2 <- t(replicate(10000, {
  idx <- sample(1:length(sa),1)
  mvrnorm(1, as.numeric(mu[idx,]), as.matrix(SigmaArray[,,idx]))
}))
# head(newboys2)
colnames(newboys2) <- c("a","b")
plot(x=newboys[,"a"],y=newboys[,"b"])
points(x=oxboy_params$a,y=oxboy_params$bU, pch=2, col="blue")

points(x=newboys2[,"a"],y=newboys2[,"b"], pch=".", col="red")

Not sure if this is better. Note that I’m still using the average boy height now.