alist(y~dnorm(mu,sigma),
mu= a[group] + b[group]*x,
c(a,b)[group] ~ dmvnorm2(c(a,b), sigma_group, Rho),
c(a,b) ~ dnorm(0,10),
sigma_group ~ dcauchy(0,2)
Rho~dlkjcorr(2),
sigma ~ dchauchy(0,2)
)
Rich people tend to have higher income. If you would predict savings in a few years based on savings now, people with more savings at baseline would probably also have a steeper increase in savings, due to compound interest etc.
Simulate the data like in the book:
a = 3.5 # average morning wait time
b = -1 # average difference afternoon wait time
sigma_a = 1 # std dev in intercepts
sigma_b = 0.5 # std dev in slopes
rho <- (0) # correlation between intercepts and slopes is now zero
## R code 13.2
Mu <- c( a , b )
## R code 13.3
cov_ab <- sigma_a*sigma_b*rho
Sigma <- matrix( c(sigma_a^2,cov_ab,cov_ab,sigma_b^2) , ncol=2 )
## R code 13.5
sigmas <- c(sigma_a,sigma_b) # standard deviations
Rho <- matrix( c(1,rho,rho,1) , nrow=2 ) # correlation matrix
# now matrix multiply to get covariance matrix
Sigma <- diag(sigmas) %*% Rho %*% diag(sigmas)
## R code 13.6
N_cafes <- 20
## R code 13.7
library(MASS)
set.seed(10) # used to replicate example
vary_effects <- mvrnorm( N_cafes , Mu , Sigma )
## R code 13.8
a_cafe <- vary_effects[,1]
b_cafe <- vary_effects[,2]
## R code 13.9
plot( a_cafe , b_cafe , col=rangi2 ,
xlab="intercepts (a_cafe)" , ylab="slopes (b_cafe)" )
# overlay population distribution
library(ellipse)
for ( l in c(0.1,0.3,0.5,0.8,0.99) )
lines(ellipse(Sigma,centre=Mu,level=l),col=col.alpha("black",0.2))
## R code 13.10
N_visits <- 10
afternoon <- rep(0:1,N_visits*N_cafes/2)
cafe_id <- rep( 1:N_cafes , each=N_visits )
mu <- a_cafe[cafe_id] + b_cafe[cafe_id]*afternoon
sigma <- 0.5 # std dev within cafes
wait <- rnorm( N_visits*N_cafes , mu , sigma )
d <- data.frame( cafe=cafe_id , afternoon=afternoon , wait=wait )
Fit the model and inspect posterior distribution for correlation:
mod.file13.1="mod13_1.Rdata"
if(file.exists(mod.file13.1)){load(mod.file13.1)}else{
m13.1 <- map2stan(
alist(
wait ~ dnorm( mu , sigma ),
mu <- a_cafe[cafe] + b_cafe[cafe]*afternoon,
c(a_cafe,b_cafe)[cafe] ~ dmvnorm2(c(a,b),sigma_cafe,Rho),
a ~ dnorm(0,10),
b ~ dnorm(0,10),
sigma_cafe ~ dcauchy(0,2),
sigma ~ dcauchy(0,2),
Rho ~ dlkjcorr(2)
) ,
data=d ,
iter=5000 , warmup=2000 , chains=2 )
save(m13.1, file=mod.file13.1)
}
## R code 13.13
post <- extract.samples(m13.1)
dens( post$Rho[,1,2] )
The predicted Rho is not centered around 0. It is also quite sensitive to a different seed (not shown).
Increase the amount of data to see if this matters:
## R code 13.10
N_visits <- 100
afternoon <- rep(0:1,N_visits*N_cafes/2)
cafe_id <- rep( 1:N_cafes , each=N_visits )
mu <- a_cafe[cafe_id] + b_cafe[cafe_id]*afternoon
sigma <- 0.5 # std dev within cafes
wait <- rnorm( N_visits*N_cafes , mu , sigma )
d <- data.frame( cafe=cafe_id , afternoon=afternoon , wait=wait )
mod.file13.1.2="mod13_1_2.Rdata"
if(file.exists(mod.file13.1.2)){load(mod.file13.1.2)}else{
m13.1 <- map2stan(
alist(
wait ~ dnorm( mu , sigma ),
mu <- a_cafe[cafe] + b_cafe[cafe]*afternoon,
c(a_cafe,b_cafe)[cafe] ~ dmvnorm2(c(a,b),sigma_cafe,Rho),
a ~ dnorm(0,10),
b ~ dnorm(0,10),
sigma_cafe ~ dcauchy(0,2),
sigma ~ dcauchy(0,2),
Rho ~ dlkjcorr(2)
) ,
data=d ,
iter=4000 , warmup=1000 , chains=3, cores=3)
save(m13.1,file=mod.file13.1.2)
}
## R code 13.13
post <- extract.samples(m13.1)
dens( post$Rho[,1,2] )
Now it is much more accurate.
Simulate the original data again
a = 3.5 # average morning wait time
b = -1 # average difference afternoon wait time
sigma_a = 1 # std dev in intercepts
sigma_b = 0.5 # std dev in slopes
rho = -0.7 # correlation between intercepts and slopes
## R code 13.2
Mu <- c( a , b )
## R code 13.3
cov_ab <- sigma_a*sigma_b*rho
Sigma <- matrix( c(sigma_a^2,cov_ab,cov_ab,sigma_b^2) , ncol=2 )
## R code 13.5
sigmas <- c(sigma_a,sigma_b) # standard deviations
Rho <- matrix( c(1,rho,rho,1) , nrow=2 ) # correlation matrix
# now matrix multiply to get covariance matrix
Sigma <- diag(sigmas) %*% Rho %*% diag(sigmas)
## R code 13.6
N_cafes <- 20
## R code 13.7
library(MASS)
set.seed(10) # used to replicate example
vary_effects <- mvrnorm( N_cafes , Mu , Sigma )
## R code 13.8
a_cafe <- vary_effects[,1]
b_cafe <- vary_effects[,2]
## R code 13.9
plot( a_cafe , b_cafe , col=rangi2 ,
xlab="intercepts (a_cafe)" , ylab="slopes (b_cafe)" )
# overlay population distribution
library(ellipse)
for ( l in c(0.1,0.3,0.5,0.8,0.99) )
lines(ellipse(Sigma,centre=Mu,level=l),col=col.alpha("black",0.2))
## R code 13.10
N_visits <- 10
afternoon <- rep(0:1,N_visits*N_cafes/2)
cafe_id <- rep( 1:N_cafes , each=N_visits )
mu <- a_cafe[cafe_id] + b_cafe[cafe_id]*afternoon
sigma <- 0.5 # std dev within cafes
wait <- rnorm( N_visits*N_cafes , mu , sigma )
d <- data.frame( cafe=cafe_id , afternoon=afternoon , wait=wait )
mod.file13M2="mod13M2.Rdata"
if(file.exists(mod.file13M2)){load(mod.file13M2)}else{
m_chapter <- map2stan(
alist(
wait ~ dnorm( mu , sigma ),
mu <- a_cafe[cafe] + b_cafe[cafe]*afternoon,
c(a_cafe,b_cafe)[cafe] ~ dmvnorm2(c(a,b),sigma_cafe,Rho),
a ~ dnorm(0,10),
b ~ dnorm(0,10),
sigma_cafe ~ dcauchy(0,2),
sigma ~ dcauchy(0,2),
Rho ~ dlkjcorr(2)
) ,
data=d ,
iter=5000 , warmup=2000 , chains=2 )
m_new <- map2stan(
alist(
wait ~ dnorm( mu , sigma ),
mu <- a_cafe[cafe] + b_cafe[cafe]*afternoon,
a_cafe[cafe] ~ dnorm(a,sigma_a),
b_cafe[cafe] ~ dnorm(b,sigma_b),
c(a,b) ~ dnorm(0,10),
c(sigma_a,sigma_b) ~ dcauchy(0,2),
sigma ~ dcauchy(0,2)
) ,
data=d ,
iter=5000 , warmup=2000 , chains=2 )
save(m_chapter,m_new,file=mod.file13M2)
}
compare(m_chapter,m_new)
## WAIC pWAIC dWAIC weight SE dSE
## m_chapter 301.7 33.1 0.0 0.71 16.81 NA
## m_new 303.5 32.8 1.8 0.29 16.94 2.44
The original is better, but not that much difference actually.
The new model does not explicitly determine the correlation between a and b. Inspect the correlations between a and b for each cafe:
post_new <- extract.samples(m_new)
correlations <- vector(length = 20)
for(i in 1:20){ correlations[i] <- cor(x=post_new$a_cafe[,1],y=post_new$b_cafe[,1]) }
correlations
## [1] -0.658916 -0.658916 -0.658916 -0.658916 -0.658916 -0.658916 -0.658916
## [8] -0.658916 -0.658916 -0.658916 -0.658916 -0.658916 -0.658916 -0.658916
## [15] -0.658916 -0.658916 -0.658916 -0.658916 -0.658916 -0.658916
This makes the correlation fixed, rather than having a distribution.
This is more or less the same problem as 13M2.
rm(list=ls())
## R code 13.17
library(rethinking)
data(UCBadmit)
d <- UCBadmit
d$male <- ifelse( d$applicant.gender=="male" , 1 , 0 )
d$dept_id <- coerce_index( d$dept )
## R code 13.19
mod.file13M3="mod13M3.Rdata"
if(file.exists(mod.file13M3)){load(mod.file13M3)}else{
m_orig <- map2stan(
alist(
admit ~ dbinom( applications , p ),
logit(p) <- a_dept[dept_id] + bm_dept[dept_id]*male,
c(a_dept,bm_dept)[dept_id] ~ dmvnorm2( c(a,bm) , sigma_dept , Rho ),
a ~ dnorm(0,10),
bm ~ dnorm(0,1),
sigma_dept ~ dcauchy(0,2),
Rho ~ dlkjcorr(2)
) ,
data=d , warmup=1000 , iter=5000 , chains=4 , cores=3 )
m_new <- map2stan(
alist(
admit ~ dbinom( applications , p ),
logit(p) <- a_dept[dept_id] + bm_dept[dept_id]*male,
c(a_dept,bm_dept)[dept_id] ~ dmvnorm2( c(a,bm) , sigma_dept , Rho ),
a ~ dnorm(0,10),
bm ~ dnorm(0,1),
sigma_dept ~ dcauchy(0,2),
Rho ~ dlkjcorr(2)
) ,
data=d , warmup=1000 , iter=5000 , chains=4 , cores=3 )
save(m_orig,m_new,file=mod.file13M3)
}
rm(list=ls())
library(rethinking)
data(bangladesh)
d <- bangladesh
d$district_id <- as.integer(as.factor(d$district))
mod.file13H1="mod13H1.Rdata"
if(file.exists(mod.file13H1)){load(mod.file13H1)}else{
mm <- map2stan(
alist(
use.contraception ~ dbinom(1, p),
logit(p) <- a_district[district_id] + bU_district[district_id]*urban,
c(a_district,bU_district)[district_id] ~ dmvnorm2( c(a,bU) , sigma_district , Rho ),
a ~ dnorm(0,10),
bU ~ dnorm(0,1),
sigma_district ~ dcauchy(0,2),
Rho ~ dlkjcorr(2)
) ,
data=d , warmup=1000 , iter=5000 , chains=4 , cores=3 )
save(mm, file=mod.file13H1)
}
Posterior distribution of Rho:
post <- extract.samples(mm)
dens(post$Rho[,1,2], main = "Rho")
Per district:
# extract the slope and intercept for each district.
pr <- precis(mm, depth = 2)
district_params <- data.frame(bU=pr@output[1:60,1]+pr@output[122,1],a=pr@output[61:120,1]+pr@output[121,1])
plot(district_params$a~district_params$bU)
par(mfrow=c(1,2))
library(dplyr)
d_sum <- d %>% as.tbl() %>% group_by(district_id,urban) %>% summarize(p=mean(use.contraception))
plot(y=d_sum$p, x=d_sum$urban, main="Data")
for(i in d_sum$district_id){
y0=as.numeric(d_sum[d_sum$district_id==i & d_sum$urban==0,"p"])
y1=as.numeric(d_sum[d_sum$district_id==i & d_sum$urban==1,"p"])
segments(x0=0,x1=1,y0=y0,y1=y1,col="grey",lty=2)
}
y = link(mm)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
y.PI <- apply(y,2,PI)
preds=data.frame(district_id=d$district_id, urban=d$urban, y.mean=colMeans(y))
# summarize predictions per district
y_sum <- preds %>% as.tbl() %>% group_by(district_id,urban) %>% summarize(p=mean(y.mean))
plot(y=y_sum$p, x=y_sum$urban, main="Model Prediction")
for(i in y_sum$district_id){
y0=as.numeric(y_sum[y_sum$district_id==i & y_sum$urban==0,"p"])
y1=as.numeric(y_sum[y_sum$district_id==i & y_sum$urban==1,"p"])
segments(x0=0,x1=1,y0=y0,y1=y1,col="steelblue",lty=2)
}
There are more of the higher slopes in the districts with low probability to begin with. Also, there are some negative slopes in the districts with high starting probability.
rm(list=ls())
data("Oxboys")
mod.file13H2="mod13H2.Rdata"
if(file.exists(mod.file13H2)){load(mod.file13H2)}else{
mm <- map2stan(
alist(
height ~ dnorm(mu, sigma),
mu <- a_boy[Subject] + bO_boy[Subject]*Occasion,
c(a_boy,bO_boy)[Subject] ~ dmvnorm2( c(a,bO) , sigma_subject , Rho ),
a ~ dnorm(0,10),
bO ~ dnorm(0,1),
c(sigma_subject,sigma) ~ dcauchy(0,2),
Rho ~ dlkjcorr(2)
) ,
data=Oxboys , warmup=2000 , iter=5000 , chains=4 , cores=3 )
save(mm, file=mod.file13H2)
}
(pr <- precis(mm, depth = 2))
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## bO_boy[1] 1.79 0.08 1.65 1.92 12000 1
## bO_boy[2] 1.37 0.08 1.24 1.50 12000 1
## bO_boy[3] 1.21 0.08 1.08 1.34 12000 1
## bO_boy[4] 2.32 0.08 2.18 2.44 12000 1
## bO_boy[5] 1.57 0.08 1.43 1.69 12000 1
## bO_boy[6] 1.02 0.08 0.89 1.15 12000 1
## bO_boy[7] 1.27 0.08 1.14 1.41 12000 1
## bO_boy[8] 1.62 0.08 1.49 1.76 12000 1
## bO_boy[9] 1.51 0.08 1.39 1.64 12000 1
## bO_boy[10] 0.96 0.08 0.83 1.09 12000 1
## bO_boy[11] 2.11 0.08 1.98 2.24 12000 1
## bO_boy[12] 1.75 0.08 1.62 1.88 12000 1
## bO_boy[13] 2.09 0.08 1.96 2.22 12000 1
## bO_boy[14] 2.15 0.08 2.02 2.29 12000 1
## bO_boy[15] 1.78 0.08 1.65 1.91 12000 1
## bO_boy[16] 1.16 0.08 1.02 1.29 12000 1
## bO_boy[17] 2.14 0.08 2.02 2.28 12000 1
## bO_boy[18] 1.49 0.08 1.36 1.62 12000 1
## bO_boy[19] 2.25 0.08 2.12 2.38 12000 1
## bO_boy[20] 1.11 0.08 0.98 1.25 12000 1
## bO_boy[21] 1.87 0.08 1.74 2.01 12000 1
## bO_boy[22] 2.01 0.09 1.88 2.15 12000 1
## bO_boy[23] 1.79 0.08 1.67 1.93 12000 1
## bO_boy[24] 1.70 0.08 1.57 1.83 12000 1
## bO_boy[25] 1.03 0.08 0.90 1.17 12000 1
## bO_boy[26] 1.41 0.08 1.28 1.54 12000 1
## a_boy[1] 139.32 0.48 138.58 140.11 12000 1
## a_boy[2] 136.14 0.47 135.40 136.90 12000 1
## a_boy[3] 149.70 0.47 148.96 150.46 12000 1
## a_boy[4] 153.70 0.47 152.96 154.46 12000 1
## a_boy[5] 143.74 0.47 142.99 144.49 12000 1
## a_boy[6] 141.79 0.46 141.05 142.52 12000 1
## a_boy[7] 139.92 0.48 139.18 140.70 12000 1
## a_boy[8] 140.33 0.47 139.55 141.06 12000 1
## a_boy[9] 130.73 0.46 129.99 131.45 12000 1
## a_boy[10] 125.56 0.47 124.79 126.28 12000 1
## a_boy[11] 139.68 0.47 138.92 140.42 12000 1
## a_boy[12] 148.23 0.46 147.46 148.92 12000 1
## a_boy[13] 145.84 0.47 145.10 146.59 12000 1
## a_boy[14] 148.92 0.47 148.19 149.68 12000 1
## a_boy[15] 135.53 0.47 134.80 136.29 12000 1
## a_boy[16] 141.86 0.46 141.15 142.63 12000 1
## a_boy[17] 132.47 0.47 131.72 133.22 12000 1
## a_boy[18] 143.84 0.46 143.09 144.57 12000 1
## a_boy[19] 153.54 0.47 152.78 154.29 12000 1
## a_boy[20] 146.00 0.48 145.21 146.72 12000 1
## a_boy[21] 141.33 0.47 140.55 142.04 12000 1
## a_boy[22] 144.68 0.48 143.91 145.43 12000 1
## a_boy[23] 142.25 0.47 141.52 143.01 12000 1
## a_boy[24] 144.80 0.46 144.04 145.52 12000 1
## a_boy[25] 134.14 0.47 133.40 134.93 12000 1
## a_boy[26] 131.07 0.47 130.30 131.80 12000 1
## a 23.35 11.36 5.33 41.49 12000 1
## bO 1.24 0.48 0.49 1.89 2138 1
## sigma_subject[1] 118.59 20.74 85.58 148.81 12000 1
## sigma_subject[2] 0.67 0.33 0.32 1.07 1699 1
## sigma 0.66 0.03 0.60 0.71 12000 1
## Rho[1,1] 1.00 0.00 1.00 1.00 12000 NaN
## Rho[1,2] 0.44 0.43 -0.17 0.99 6045 1
## Rho[2,1] 0.44 0.43 -0.17 0.99 6045 1
## Rho[2,2] 1.00 0.00 1.00 1.00 12000 1
lowest slope around 1, highest around 2, so slope can matter ~14cm in this timespan.
The height difference is over 20 cm, so the initial height is more important in this timespan, but because the two are correlated (see below) it is difficult to say what is more important. Of course, the higher starting point was already caused by a faster growth rate in the past.
post <- extract.samples(mm)
dens(post$Rho[,1,2], main = "Rho")
Per boy:
# extract the slope and intercept for each district.
oxboy_params <- data.frame(bU=pr@output[1:26,1]+pr@output[54,1],a=pr@output[27:52,1]+pr@output[53,1])
plot(x=oxboy_params$a,y=oxboy_params$bU, xlab="starting height", ylab="growth speed")
Tall boys tend to also grow faster, but although this is not universally true, some boys level off, some have a period of fast growth.
rm(list=ls())
data("Oxboys")
mod.file13H2_2="mod13H2_2.Rdata"
if(file.exists(mod.file13H2_2)){load(mod.file13H2_2)}else{
mm <- map2stan(
alist(
height ~ dnorm(mu, sigma),
mu <- a + a_boy[Subject] + (bO + bO_boy[Subject])*Occasion,
c(a_boy,bO_boy)[Subject] ~ dmvnorm2( c(0,0) , sigma_subject , Rho ),
a ~ dnorm(0,10),
bO ~ dnorm(0,1),
c(sigma_subject,sigma) ~ dcauchy(0,2),
Rho ~ dlkjcorr(2)
) ,
data=Oxboys , warmup=1000 , iter=5000 , chains=4 , cores=3 )
save(mm,file=mod.file13H2_2)
}
(pr <- precis(mm, depth = 2))
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## bO_boy[1] 0.30 0.22 -0.01 0.61 7 1.20
## bO_boy[2] -0.12 0.22 -0.44 0.17 7 1.21
## bO_boy[3] -0.26 0.21 -0.58 0.02 9 1.17
## bO_boy[4] 0.84 0.22 0.52 1.12 9 1.18
## bO_boy[5] 0.09 0.22 -0.23 0.38 8 1.19
## bO_boy[6] -0.46 0.22 -0.79 -0.18 8 1.18
## bO_boy[7] -0.21 0.22 -0.53 0.08 8 1.19
## bO_boy[8] 0.14 0.22 -0.18 0.43 8 1.20
## bO_boy[9] 0.02 0.22 -0.32 0.30 7 1.22
## bO_boy[10] -0.54 0.22 -0.88 -0.25 7 1.22
## bO_boy[11] 0.62 0.22 0.30 0.92 7 1.21
## bO_boy[12] 0.27 0.21 -0.04 0.57 8 1.18
## bO_boy[13] 0.61 0.22 0.31 0.91 8 1.19
## bO_boy[14] 0.68 0.22 0.37 0.97 8 1.19
## bO_boy[15] 0.29 0.22 -0.02 0.60 7 1.21
## bO_boy[16] -0.32 0.22 -0.64 -0.04 8 1.19
## bO_boy[17] 0.64 0.22 0.32 0.94 7 1.22
## bO_boy[18] 0.02 0.22 -0.30 0.31 8 1.19
## bO_boy[19] 0.78 0.22 0.45 1.07 8 1.18
## bO_boy[20] -0.36 0.21 -0.69 -0.08 9 1.18
## bO_boy[21] 0.39 0.22 0.07 0.68 8 1.20
## bO_boy[22] 0.53 0.22 0.20 0.81 8 1.19
## bO_boy[23] 0.31 0.22 -0.01 0.61 8 1.20
## bO_boy[24] 0.22 0.22 -0.10 0.51 8 1.19
## bO_boy[25] -0.45 0.22 -0.78 -0.16 7 1.20
## bO_boy[26] -0.08 0.22 -0.40 0.21 7 1.21
## a_boy[1] 29.88 49.28 -2.86 116.73 2 8.60
## a_boy[2] 26.69 49.28 -6.16 113.61 2 8.60
## a_boy[3] 40.16 49.34 7.47 127.22 2 8.60
## a_boy[4] 44.18 49.33 11.61 131.36 2 8.60
## a_boy[5] 34.26 49.31 1.66 121.30 2 8.61
## a_boy[6] 32.30 49.31 -0.51 119.29 2 8.60
## a_boy[7] 30.45 49.30 -2.66 117.00 2 8.60
## a_boy[8] 30.87 49.29 -2.00 117.67 2 8.60
## a_boy[9] 21.33 49.26 -11.50 108.14 2 8.59
## a_boy[10] 16.18 49.25 -16.50 103.00 2 8.60
## a_boy[11] 30.25 49.27 -2.45 117.10 2 8.59
## a_boy[12] 38.71 49.32 6.03 125.83 2 8.61
## a_boy[13] 36.36 49.30 3.70 123.38 2 8.60
## a_boy[14] 39.41 49.31 6.65 126.39 2 8.60
## a_boy[15] 26.11 49.27 -6.67 112.96 2 8.60
## a_boy[16] 32.37 49.31 -0.63 119.08 2 8.60
## a_boy[17] 23.09 49.25 -9.67 109.82 2 8.60
## a_boy[18] 34.34 49.31 1.37 121.05 2 8.60
## a_boy[19] 44.02 49.33 11.31 131.11 2 8.59
## a_boy[20] 36.48 49.33 3.87 123.61 2 8.60
## a_boy[21] 31.87 49.29 -0.79 118.82 2 8.60
## a_boy[22] 35.20 49.30 2.48 122.14 2 8.60
## a_boy[23] 32.78 49.30 -0.01 119.59 2 8.60
## a_boy[24] 35.31 49.31 2.85 122.58 2 8.60
## a_boy[25] 24.68 49.28 -8.10 111.52 2 8.60
## a_boy[26] 21.67 49.26 -11.05 108.44 2 8.60
## a 109.46 49.29 22.58 142.10 2 8.62
## bO 1.48 0.20 1.22 1.76 7 1.24
## sigma_subject[1] 35.30 48.30 5.17 118.50 2 5.00
## sigma_subject[2] 0.49 0.12 0.33 0.63 8 1.18
## sigma 0.66 0.03 0.60 0.71 3846 1.00
## Rho[1,1] 1.00 0.00 1.00 1.00 16000 NaN
## Rho[1,2] 0.42 0.24 0.06 0.80 158 1.04
## Rho[2,1] 0.42 0.24 0.06 0.80 158 1.04
## Rho[2,2] 1.00 0.00 1.00 1.00 16000 1.00
post <- extract.samples(mm)
dens(post$Rho[,1,2], main = "Rho")
# extract the slope and intercept for each district.
oxboy_params <- data.frame(bU=pr@output[1:26,1]+pr@output[54,1],a=pr@output[27:52,1]+pr@output[53,1])
plot(x=oxboy_params$a,y=oxboy_params$bU, xlab="starting height", ylab="growth speed")
library(MASS)
# ?mvrnorm
# get the average for a and b0 from the posterior predictions
bO_total <- apply(post$bO_boy,2,function(bO_subject){bO_subject+post$bO})
dens(bO_total)
a_total <- apply(post$a_boy,2,function(a_subject){a_subject+post$a})
dens(a_total)
Definitely not normally distributed, so mean and sd don’t make much sense.
mu<-c(mean(a_total),mean(bO_total))
sa = sd(a_total)
sb = sd(bO_total)
r = mean(post$Rho[,1,2])
Sigma = matrix(c(sa^2,sa*sb*r,sa*sb*r,sb^2),nrow=2)
newboys <- mvrnorm(100, mu, Sigma)
colnames(newboys)<-c("a","b")
plot(x=newboys[,"a"],y=newboys[,"b"])
points(x=oxboy_params$a,y=oxboy_params$bU, pch=2, col="blue")
Not far off the real data.
How do I incorporate the uncertainty in a and b0?
vectorize the above:
mu=data.frame(a=rowMeans(a_total),b=rowMeans(bO_total))
sa = apply(a_total,1,sd)
sb = apply(bO_total,1,sd)
r = post$Rho[,1,2]
SigmaArray <- array(rep(NA,2*2*length(sa)),dim=c(2,2,length(sa)))
for(i in 1:length(sa)){
SigmaArray[,,i]<-matrix(c(sa[i]^2,sa[i]*sb[i]*r[i],sa[i]*sb[i]*r[i],sb[i]^2),nrow=2)
}
# this doesn't work
# newboys2 <- mvrnorm(100, mu, SigmaArray)
This doesn’t work in vectorized format. Could draw each sample from a randomly selected set of parameters:
Then it doesn’t make too much sense to draw only 10 samples.
newboys2 <- t(replicate(10000, {
idx <- sample(1:length(sa),1)
mvrnorm(1, as.numeric(mu[idx,]), as.matrix(SigmaArray[,,idx]))
}))
# head(newboys2)
colnames(newboys2) <- c("a","b")
plot(x=newboys[,"a"],y=newboys[,"b"])
points(x=oxboy_params$a,y=oxboy_params$bU, pch=2, col="blue")
points(x=newboys2[,"a"],y=newboys2[,"b"], pch=".", col="red")
Not sure if this is better. Note that I’m still using the average boy height now.