This chapter extended the basic multilevel strategy of partial pooling to slopes as well as intercepts. Accomplishing this meant modeling covariation in the statistical population of parameters. The LKJcorr prior was introduced as a convenient family of priors for correlation matrices. You saw how covariance models can be applied to causal inference, using instrumental variables and the front-door criterion. Gaussian processes represent a practical method of extending the varying effects strategy to continuous dimensions of similarity, such as spatial, network, phylogenetic, or any other abstract distance between entities in the data.
Place each answer inside the code chunk (grey box). The code chunks should contain a text response or a code that completes/answers the question or activity requested. Make sure to include plots if the question requests them.
Finally, upon completion, name your final output .html file as: YourName_ANLY505-Year-Semester.html and publish the assignment to your R Pubs account and submit the link to Canvas. Each question is worth 5 points.
14-1. Repeat the café robot simulation from the beginning of the chapter. This time, set rho to zero, so that there is no correlation between intercepts and slopes. Plot the posterior distribution. How does the posterior distribution of the correlation reflect this change in the underlying simulation?
library(MASS)
a <- 3.5
b <- (-1)
sigma_a <- 1
sigma_b <- 0.5
rho <- 0
Mu <- c(a, b)
cov_ab <- sigma_a * sigma_b * rho
Sigma <- matrix(c(sigma_a^2, cov_ab, cov_ab, sigma_b^2), ncol=2)
# Simulation
N_cafes <- 20
set.seed(6)
vary_effects <- mvrnorm(N_cafes, Mu, Sigma)
a_cafe <- vary_effects[ ,1]
b_cafe <- vary_effects[ ,2]
N_visits <- 10
afternoon <- rep(0:1, N_visits * N_cafes/2)
cafe_id <- rep(1:N_cafes, each=N_visits)
mu <- a_cafe[cafe_id] + b_cafe[cafe_id] * afternoon
sigma <- 0.5 # std dev within cafes
wait <- rnorm(N_visits * N_cafes, mu, sigma)
d <- data.frame(cafe=cafe_id, afternoon=afternoon, wait=wait)
# set.seed(42)
# m14.1 <- ulam(
# alist(
# wait ~ normal(mu, sigma),
# mu <- a_cafe[cafe] + b_cafe[cafe] * afternoon,
# c(a_cafe,b_cafe)[cafe] ~ multi_normal(c(a,b), Rho, sigma_cafe),
# a ~ normal(5, 2),
# b ~ normal(-1, 0.5),
# sigma_cafe ~ exponential(1),
# sigma ~ exponential(1),
# Rho ~ lkj_corr(2)
# ), data=d, chains=4, cores=4, refresh=0
# )
#
# precis(m14.1, pars="Rho[1, 2]")
#
# posterior <- extract.samples(m14.1)
# dens(posterior$Rho[ , 1, 2])
# legend("topright",
# pch=15,
# legend=c("Rho"))
14-2. Fit this multilevel model to the simulated café data: \[\begin{align} \ W_i ∼ Normal(μ_i, σ) \\ \ μ_i = α_{CAFE[i]} + β_{CAFE[i]}A_i \\ \ α_{CAFE} ∼ Normal(α, σ_α) \\ \ β_{CAFE} ∼ Normal(β, σ_β) \\ \ α ∼ Normal(0, 10) \\ \ β ∼ Normal(0, 10) \\ \ σ, σ_α, σ_β ∼ Exponential(1) \\ \end{align}\] Use WAIC to compare this model to the model from the chapter, the one that uses a multi-variate Gaussian prior. Visualize the differences between the posterior means. Explain the result.
a <- 3.5
b <- (-1)
sigma_a <- 1
sigma_b <- 0.5
rho <- -0.7
Mu <- c(a, b)
cov_ab <- sigma_a * sigma_b * rho
Sigma <- matrix(c(sigma_a^2, cov_ab, cov_ab, sigma_b^2), ncol=2)
# Simulation
N_cafes <- 20
set.seed(42)
vary_effects <- mvrnorm(N_cafes, Mu, Sigma)
a_cafe <- vary_effects[ ,1]
b_cafe <- vary_effects[ ,2]
N_visits <- 10
afternoon <- rep(0:1, N_visits * N_cafes/2)
cafe_id <- rep(1:N_cafes, each=N_visits)
mu <- a_cafe[cafe_id] + b_cafe[cafe_id] * afternoon
sigma <- 0.5 # std dev within cafes
wait <- rnorm(N_visits * N_cafes, mu, sigma)
d <- data.frame(cafe=cafe_id, afternoon=afternoon, wait=wait)
# set.seed(42)
# m14.2_base <- ulam(
# alist(
# wait ~ normal(mu, sigma),
# mu <- a_cafe[cafe] + b_cafe[cafe] * afternoon,
# c(a_cafe,b_cafe)[cafe] ~ multi_normal(c(a,b), Rho, sigma_cafe),
# a ~ normal(5, 2),
# b ~ normal(-1, 0.5),
# sigma_cafe ~ exponential(1),
# sigma ~ exponential(1),
# Rho ~ lkj_corr(2)
# ), data=d, chains=4, cores=4, iter=2000, log_lik=TRUE, refresh=0
# )
#
#
# precis(m14.2_base)
#
# m14.2 <- ulam(
# alist(
# wait ~ dnorm(mu, sigma),
# mu <- a_cafe[cafe] + b_cafe[cafe] * afternoon,
# a_cafe[cafe] ~ dnorm(a, sigma_alpha),
# b_cafe[cafe] ~ dnorm(b, sigma_beta),
# a ~ dnorm(0, 10),
# b ~ dnorm(0, 10),
# sigma ~ dexp(1),
# sigma_alpha ~ dexp(1),
# sigma_beta ~ dexp(1)
# ),
# data=d, chains=4, cores=4, iter=2000, log_lik=TRUE, refresh=0
# )
#
# a_base_mod <- apply(extract.samples(m14.2_base)$a_cafe, 2, mean)
# a_mod <- apply(extract.samples(m14.2)$a_cafe, 2, mean)
#
# b_base_mod <- apply(extract.samples(m14.2_base)$b_cafe, 2, mean)
# b_mod <- apply(extract.samples(m14.2)$b_cafe, 2, mean)
#
# compare(m14.2_base, m14.2)
#
# plot(a_mod, b_mod,
# xlab="Intercept", ylab="Slope",
# pch=15, col=rangi2, ylim=c(-1.5, -0.1))
# points(a_base_mod, b_base_mod, pch=1)
14-3. Use WAIC to compare the Gaussian process model of Oceanic tools to the models fit to the same data in Chapter 11. Pay special attention to the effective numbers of parameters, as estimated by WAIC. Explain the result.
data(Kline)
d <- Kline
d$P <- scale(log(d$population))
d$contact_id <- ifelse(d$contact == "high", 2, 1)
dat_11 <- list(T=d$total_tools, P=d$population, cid=d$contact_id)
d$society <- 1:10
data("islandsDistMatrix")
dat_gauss <- list(T=d$total_tools, P=d$population, society=d$society, Dmat=islandsDistMatrix)
# m11.11 <- ulam(
# alist(
# T ~ dpois(lambda),
# lambda <- exp(a[cid]) * P^b[cid] / g,
# a[cid] ~ dnorm(1, 1),
# b[cid] ~ dexp(1),
# g ~ dexp(1)
# ),
# data=dat_11, chains=4, cores=4, iter=2000, log_lik=TRUE, refresh=0
# )
#
# m14.3 <- ulam(
# alist(
# T ~ dpois(lambda),
# lambda <- (a * P^b / g) * exp(k[society]),
# vector[10]:k ~ multi_normal(0, SIGMA),
# matrix[10, 10]:SIGMA <- cov_GPL2(Dmat, etasq, rhosq, 0.01),
# c(a, b, g) ~ dexp(1),
# etasq ~ dexp(2),
# rhosq ~ dexp(0.5)
# ),
# data=dat_gauss, chains=4, cores=4, iter=2000, log_lik=TRUE, refresh=0
# )
#
# compare(m11.11, m14.3)
14-4. Let’s revisit the Bangladesh fertility data, data(bangladesh), from the practice problems for Chapter 13. Fit a model with both varying intercepts by district_id and varying slopes of urban by district_id. You are still predicting use.contraception. Inspect the correlation between the intercepts and slopes. Can you interpret this correlation, in terms of what it tells you about the pattern of contraceptive use in the sample? Plot the mean (or median) varying effect estimates for both the intercepts and slopes, by district. Then you can visualize the correlation and maybe more easily think through what it means to have a particular correlation. Plot the predicted proportion of women using contraception, with urban women on one axis and rural on the other. Explain the results.
data(bangladesh)
d <- bangladesh
dat_list <- list(
C = d$use.contraception,
did = as.integer(as.factor(d$district)),
urban = d$urban
)
#
# m_14.4 <- ulam(
# alist(
# C ~ bernoulli(p),
# logit(p) <- a[did] + b[did] * urban,
# c(a, b)[did] ~ multi_normal(c(abar, bbar), Rho, Sigma),
# abar ~ normal(0, 1),
# bbar ~ normal(0, 0.5),
# Rho ~ lkj_corr(2),
# Sigma ~ exponential(1)
# ),
# data = dat_list, chains = 4, cores = 4, iter = 4000
# )
#
# precis(m_14.4)
#
# precis(m_14.4, depth = 3, pars = c("Rho", "Sigma"))
#
# post <- extract.samples(m_14.4)
# a <- apply(post$a, 2, mean)
# b <- apply(post$b, 2, mean)
# plot(a, b, xlab = "a (intercept)", ylab = "b (urban slope)")
# abline(h = 0, lty = 2)
# abline(v = 0, lty = 2)
14-5 Now consider the predictor variables age.centered and living.children, also contained in data(bangladesh). Suppose that age influences contraceptive use (changing attitudes) and number of children (older people have had more time to have kids). Number of children may also directly influence contraceptive use. Draw a DAG that reflects these hypothetical relationships. Then build models needed to evaluate the DAG. You will need at least two models. Retain district and urban, as in 14-4. What do you conclude about the causal influence of age and children?
dag_14.5 <- dagitty("dag{
Contraception <- Age
Number_of_Children <- Age
Contraception <- Number_of_Children
}"
)
plot(dag_14.5)
data(bangladesh)
d <- bangladesh
dat_list <- list(
C = d$use.contraception,
did=as.integer(as.factor(d$district)),
urban=d$urban
)
dat_list$children <- standardize(d$living.children)
dat_list$age <- standardize(d$age.centered)
#
# m14.5_age <- ulam(
# alist(
# C ~ bernoulli(p),
# logit(p) <- a[did] + b[did] * urban + bA * age,
# c(a, b)[did] ~ multi_normal(c(a_bar, b_bar), Rho, Sigma),
# a_bar ~ normal(0, 1),
# c(b_bar, bA) ~ normal(0, 0.5),
# Rho ~ lkj_corr(2),
# Sigma ~ exponential(1)
# ),
# dat=dat_list, chains=4, cores=4, iter=2000, log_lik=TRUE, refresh=0
# )
#
# precis(m14.5_age)
#
# precis(m14.5_age, depth=2, pars=c("Rho", "Sigma"))
#
# m14.5_age_chil <- ulam(
# alist(
# C ~ bernoulli(p),
# logit(p) <- a[did] + b[did] * urban + bA * age + bN * children,
# c(a, b)[did] ~ multi_normal(c(a_bar, b_bar), Rho, Sigma),
# a_bar ~ normal(0, 1),
# c(b_bar, bA, bN) ~ normal(0, 0.5),
# Rho ~ lkj_corr(2),
# Sigma ~ exponential(1)
# ),
# dat=dat_list, chains=4, cores=4, iter=2000, log_lik=TRUE, refresh=0
# )
#
# precis(m14.5_age_chil)
#
# precis(m14.5_age_chil, depth=2, pars=c("Rho", "Sigma"))
#
# compare(m14.5_age, m14.5_age_chil)