This chapter extended the basic multilevel strategy of partial pooling to slopes as well as intercepts. Accomplishing this meant modeling covariation in the statistical population of parameters. The LKJcorr prior was introduced as a convenient family of priors for correlation matrices. You saw how covariance models can be applied to causal inference, using instrumental variables and the front-door criterion. Gaussian processes represent a practical method of extending the varying effects strategy to continuous dimensions of similarity, such as spatial, network, phylogenetic, or any other abstract distance between entities in the data.
Place each answer inside the code chunk (grey box). The code chunks should contain a text response or a code that completes/answers the question or activity requested. Make sure to include plots if the question requests them.
Finally, upon completion, name your final output .html
file as: YourName_ANLY505-Year-Semester.html and publish
the assignment to your R Pubs account and submit the link to Canvas.
Each question is worth 5 points.
14-1. Repeat the café robot simulation from the beginning of the chapter. This time, set rho to zero, so that there is no correlation between intercepts and slopes. Plot the posterior distribution. How does the posterior distribution of the correlation reflect this change in the underlying simulation?
a = 3.5
b = (-1)
sigma_a = 1
sigma_b = 0.5
rho= 0
Mu = c(a, b)
cov_ab = sigma_a * sigma_b * rho
Sigma = matrix(c(sigma_a^2, cov_ab, cov_ab, sigma_b^2), ncol = 2)
N_cafes = 20
set.seed(6)
vary_effects = mvrnorm(N_cafes, Mu, Sigma)
a_cafe = vary_effects[, 1]
b_cafe = vary_effects[, 2]
N_visits = 10
afternoon = rep(0:1, N_visits * N_cafes / 2)
cafe_id = rep(1:N_cafes, each = N_visits)
mu = a_cafe[cafe_id] + b_cafe[cafe_id] * afternoon
sigma = 0.5 # std dev within cafes
wait = rnorm(N_visits * N_cafes, mu, sigma)
d = data.frame(cafe = cafe_id, afternoon = afternoon, wait = wait)
m_M1 = ulam(
alist(
wait ~ normal(mu, sigma),
mu <- a_cafe[cafe] + b_cafe[cafe] * afternoon,
c(a_cafe, b_cafe)[cafe] ~ multi_normal(c(a, b), Rho, sigma_cafe),
a ~ normal(5, 2),
b ~ normal(-1, 0.5),
sigma_cafe ~ exponential(1),
sigma ~ exponential(1),
Rho ~ lkj_corr(2)
),
data = d, chains = 6, cores = 6
)
post = extract.samples(m_M1)
ggplot() +
stat_halfeye(aes(x = post$Rho[, 1, 2])) +
theme_bw() +
labs(x = "Rho")
precis(m_M1, pars = "Rho[1,2]")
## result
## mean 0.01276044
## sd 0.23550359
## 5.5% -0.37040035
## 94.5% 0.39759593
## n_eff 2509.46205075
## Rhat 1.00032502
14-2. Fit this multilevel model to the simulated café data: \[\begin{align} \ W_i ∼ Normal(μ_i, σ) \\ \ μ_i = α_{CAFE[i]} + β_{CAFE[i]}A_i \\ \ α_{CAFE} ∼ Normal(α, σ_α) \\ \ β_{CAFE} ∼ Normal(β, σ_β) \\ \ α ∼ Normal(0, 10) \\ \ β ∼ Normal(0, 10) \\ \ σ, σ_α, σ_β ∼ Exponential(1) \\ \end{align}\] Use WAIC to compare this model to the model from the chapter, the one that uses a multi-variate Gaussian prior. Visualize the differences between the posterior means. Explain the result.
a = 3.5
b = -1
sigma_a = 1
sigma_b = 0.5
rho = -0.7
Mu = c(a, b)
cov_ab = sigma_a * sigma_b * rho
Sigma = matrix(c(sigma_a^2, cov_ab, cov_ab, sigma_b^2), ncol = 2)
N_cafes = 20
set.seed(42)
vary_effects = mvrnorm(N_cafes, Mu, Sigma)
a_cafe = vary_effects[, 1]
b_cafe = vary_effects[, 2]
N_visits = 10
afternoon = rep(0:1, N_visits * N_cafes / 2)
cafe_id = rep(1:N_cafes, each = N_visits)
mu = a_cafe[cafe_id] + b_cafe[cafe_id] * afternoon
sigma = 0.5
wait = rnorm(N_visits * N_cafes, mu, sigma)
d = data.frame(cafe = cafe_id, afternoon = afternoon, wait = wait)
m_M2Baseline = ulam(
alist(
wait ~ normal(mu, sigma),
mu <- a_cafe[cafe] + b_cafe[cafe] * afternoon,
c(a_cafe, b_cafe)[cafe] ~ multi_normal(c(a, b), Rho, sigma_cafe),
a ~ normal(5, 2),
b ~ normal(-1, 0.5),
sigma_cafe ~ exponential(1),
sigma ~ exponential(1),
Rho ~ lkj_corr(2)
),
data = d, chains = 4, cores = 4
)
m_M2 = ulam(
alist(
wait ~ dnorm(mu, sigma),
mu <- a_cafe[cafe] + b_cafe[cafe] * afternoon,
a_cafe[cafe] ~ dnorm(a, sigma_alpha),
b_cafe[cafe] ~ dnorm(b, sigma_beta),
a ~ dnorm(0, 10),
b ~ dnorm(0, 10),
sigma ~ dexp(1),
sigma_alpha ~ dexp(1),
sigma_beta ~ dexp(1)
),
data = d, chains = 4, cores = 4
)
post_Base = extract.samples(m_M2Baseline)
a_Base = apply(post_Base$a_cafe, 2, mean)
b_Base = apply(post_Base$b_cafe, 2, mean)
post_M2 = extract.samples(m_M2)
a_M2 = apply(post_M2$a_cafe, 2, mean)
b_M2 = apply(post_M2$b_cafe, 2, mean)
plot(a_M2, b_M2,
xlab = "intercept", ylab = "slope",
pch = 16, col = rangi2, ylim = c(min(b_M2) - 0.05, max(b_M2) + 0.05),
xlim = c(min(a_M2) - 0.1, max(a_M2) + 0.1), cex = 2
)
points(a_Base, b_Base, pch = 1, cex = 2)
## This model takes into consideration the correlation between slopes
and intercepts. As we can see, slopes and intercepts have a inherent
relationship and it reveals a negative correlation between them. Large
intercepts are connected with slopes that are strongly negative.
14-3. Use WAIC to compare the Gaussian process model of Oceanic tools to the models fit to the same data in Chapter 11. Pay special attention to the effective numbers of parameters, as estimated by WAIC. Explain the result.
data(Kline)
d <- Kline
# Chapter 11 stuff
d$P <- scale(log(d$population))
d$contact_id <- ifelse(d$contact == "high", 2, 1)
# Chapter 14 stuff
d$society <- 1:10
data(islandsDistMatrix)
dat2 <- list(T = d$total_tools, P = d$population, cid = d$contact_id)
m11.11 <- ulam(
alist(
T ~ dpois(lambda),
lambda <- exp(a[cid]) * P^b[cid] / g,
a[cid] ~ dnorm(1, 1),
b[cid] ~ dexp(1),
g ~ dexp(1)
),
data = dat2, chains = 4, cores = 4, log_lik = TRUE
)
dat_list <- list(T = d$total_tools, P = d$population, society = d$society, Dmat = islandsDistMatrix)
m14.8 <- ulam(
alist(
T ~ dpois(lambda),
lambda <- (a * P^b / g) * exp(k[society]),
vector[10]:k ~ multi_normal(0, SIGMA),
matrix[10, 10]:SIGMA <- cov_GPL2(Dmat, etasq, rhosq, 0.01), c(a, b, g) ~ dexp(1), etasq ~ dexp(2), rhosq ~ dexp(0.5)
),
data = dat_list, chains = 4, cores = 4, iter = 2000, log_lik = TRUE
)
compare(m11.11, m14.8)
## WAIC SE dWAIC dSE pWAIC weight
## m14.8 67.83587 2.306448 0.00000 NA 4.180303 0.998449729
## m11.11 80.77141 11.376891 12.93555 11.5508 5.212246 0.001550271
14-4. Let’s revisit the Bangladesh fertility data, data(bangladesh), from the practice problems for Chapter 13. Fit a model with both varying intercepts by district_id and varying slopes of urban by district_id. You are still predicting use.contraception. Inspect the correlation between the intercepts and slopes. Can you interpret this correlation, in terms of what it tells you about the pattern of contraceptive use in the sample? Plot the mean (or median) varying effect estimates for both the intercepts and slopes, by district. Then you can visualize the correlation and maybe more easily think through what it means to have a particular correlation. Plot the predicted proportion of women using contraception, with urban women on one axis and rural on the other. Explain the results.
data(bangladesh)
d <- bangladesh
dat_list <- list(
C = d$use.contraception,
did = as.integer(as.factor(d$district)),
urban = d$urban
)
m_H1 <- ulam(
alist(
C ~ bernoulli(p),
logit(p) <- a[did] + b[did] * urban,
c(a, b)[did] ~ multi_normal(c(abar, bbar), Rho, Sigma),
abar ~ normal(0, 1),
bbar ~ normal(0, 0.5),
Rho ~ lkj_corr(2),
Sigma ~ exponential(1)
),
data = dat_list, chains = 4, cores = 4, iter = 4000
)
precis(m_H1)
## mean sd 5.5% 94.5% n_eff Rhat4
## abar -0.6862601 0.09986652 -0.8489704 -0.5295257 5317.029 0.9999925
## bbar 0.6370801 0.16063809 0.3792178 0.8938546 3998.276 1.0002928
precis(m_H1, depth = 3, pars = c("Rho", "Sigma"))
## mean sd 5.5% 94.5% n_eff Rhat4
## Rho[1,1] 1.0000000 0.000000e+00 1.0000000 1.0000000 NaN NaN
## Rho[1,2] -0.6504501 1.697663e-01 -0.8664618 -0.3470755 1376.404 1.0069766
## Rho[2,1] -0.6504501 1.697663e-01 -0.8664618 -0.3470755 1376.404 1.0069766
## Rho[2,2] 1.0000000 5.843557e-17 1.0000000 1.0000000 7519.952 0.9994999
## Sigma[1] 0.5766410 9.756377e-02 0.4304802 0.7424732 2044.418 1.0007327
## Sigma[2] 0.7767988 2.007929e-01 0.4725901 1.1108130 1061.800 1.0040047
post <- extract.samples(m_H1)
a <- apply(post$a, 2, mean)
b <- apply(post$b, 2, mean)
plot(a, b, xlab = "a (intercept)", ylab = "b (urban slope)")
abline(h = 0, lty = 2)
abline(v = 0, lty = 2)
R <- apply(post$Rho, 2:3, mean)
s <- apply(post$Sigma, 2, mean)
S <- diag(s) %*% R %*% diag(s)
ll <- c(0.5, 0.67, 0.89, 0.97)
for (l in ll) {
el <- ellipse(S, centre = c(mean(post$abar), mean(post$bbar)), level = l)
lines(el, col = "black", lwd = 0.5)
}
u0 <- inv_logit(a)
u1 <- inv_logit(a + b)
plot(u0, u1, xlim = c(0, 1), ylim = c(0, 1), xlab = "urban = 0", ylab = "urban = 1")
abline(h = 0.5, lty = 2)
abline(v = 0.5, lty = 2)
## We observe that in districts with a higher utilization of
contraception outside of urban areas, the slopes are smaller. There is
no substantial change in the use of contraceptives when transitioning to
urban areas.
14-5 Now consider the predictor variables age.centered and living.children, also contained in data(bangladesh). Suppose that age influences contraceptive use (changing attitudes) and number of children (older people have had more time to have kids). Number of children may also directly influence contraceptive use. Draw a DAG that reflects these hypothetical relationships. Then build models needed to evaluate the DAG. You will need at least two models. Retain district and urban, as in 14-4. What do you conclude about the causal influence of age and children?
dag <- dagitty::dagitty(" dag {
Age -> N_children
Age -> contraception
N_children -> contraception
}")
drawdag(dag)
dat_list$children <- standardize(d$living.children)
dat_list$age <- standardize(d$age.centered)
m1 <- ulam( alist(
C ~ bernoulli( p ),
logit(p) <- a[did] + b[did]*urban + bA*age,
c(a,b)[did] ~ multi_normal( c(abar,bbar), Rho, Sigma),
abar ~ normal(0,1),
c(bbar,bA) ~ normal(0,0.5),
Rho ~ lkj_corr(2),
Sigma ~ exponential(1)
) , data=dat_list, chains=4, cores=4)
precis(m1)
## mean sd 5.5% 94.5% n_eff Rhat4
## abar -0.68837298 0.09854957 -0.84099405 -0.5307090 1616.888 1.0010308
## bA 0.08208818 0.04896449 0.00404054 0.1602275 3347.417 0.9993477
## bbar 0.64238105 0.15683498 0.39174481 0.8887102 1236.946 1.0016384
precis(m1, depth=3, pars=c("Rho","Sigma"))
## mean sd 5.5% 94.5% n_eff Rhat4
## Rho[1,1] 1.0000000 0.000000e+00 1.0000000 1.0000000 NaN NaN
## Rho[1,2] -0.6583075 1.609158e-01 -0.8679235 -0.3723901 372.7440 1.011686
## Rho[2,1] -0.6583075 1.609158e-01 -0.8679235 -0.3723901 372.7440 1.011686
## Rho[2,2] 1.0000000 6.031563e-17 1.0000000 1.0000000 1924.8383 0.997998
## Sigma[1] 0.5776238 9.766819e-02 0.4289787 0.7437571 373.3351 1.013654
## Sigma[2] 0.7736993 2.042912e-01 0.4414220 1.1016613 152.9991 1.023648
m2 <- ulam( alist(
C ~ bernoulli( p ),
logit(p) <- a[did] + b[did]*urban + bK*children + bA*age,
c(a,b)[did] ~ multi_normal(c(abar,bbar), Rho, Sigma),
abar ~ normal(0,1),
c(bbar,bK,bA) ~ normal(0,0.5),
Rho ~ lkj_corr(2),
Sigma ~ exponential(1)
) , data=dat_list , chains=4 , cores=4 )
precis(m2)
## mean sd 5.5% 94.5% n_eff Rhat4
## abar -0.7237835 0.10611364 -0.8893651 -0.5502366 1365.9098 1.001529
## bA -0.2642294 0.07164849 -0.3740615 -0.1482292 2303.3172 0.999160
## bK 0.5123694 0.07396965 0.3921383 0.6301872 2082.7205 0.999374
## bbar 0.6938769 0.16495556 0.4291107 0.9546215 970.4132 1.000410
precis(m2, depth=3, pars=c("Rho","Sigma") )
## mean sd 5.5% 94.5% n_eff Rhat4
## Rho[1,1] 1.0000000 0.000000e+00 1.0000000 1.0000000 NaN NaN
## Rho[1,2] -0.6385627 1.660409e-01 -0.8513626 -0.3359315 338.0252 1.003863
## Rho[2,1] -0.6385627 1.660409e-01 -0.8513626 -0.3359315 338.0252 1.003863
## Rho[2,2] 1.0000000 6.403459e-17 1.0000000 1.0000000 1943.0153 0.997998
## Sigma[1] 0.6148830 1.010069e-01 0.4660185 0.7866580 512.6073 1.003642
## Sigma[2] 0.7831447 2.032106e-01 0.4769437 1.1324762 241.0120 1.026676