Assignment #11

Questions

14-1. Repeat the café robot simulation from the beginning of the chapter. This time, set rho to zero, so that there is no correlation between intercepts and slopes. Plot the posterior distribution. How does the posterior distribution of the correlation reflect this change in the underlying simulation?

library(MASS)
library(ellipse)

# Simulate the population
a <- 3.5 # average morning wait time
b <- (-1) # average difference afternoon wait time
sigma_a <- 1 # std dev in intercepts
sigma_b <- 0.5 # std dev in slopes
rho <- 0 # correlation between intercepts and slopes

# Create relevant variables
Mu <- c(a, b)
cov_ab <- sigma_a * sigma_b * rho
Sigma <- matrix(c(sigma_a^2, cov_ab, cov_ab, sigma_b^2), ncol=2)

# Simulation
N_cafes <- 20
set.seed(6)
vary_effects <- mvrnorm(N_cafes, Mu, Sigma)
a_cafe <- vary_effects[ ,1]
b_cafe <- vary_effects[ ,2]

N_visits <- 10
afternoon <- rep(0:1, N_visits * N_cafes/2)
cafe_id <- rep(1:N_cafes, each=N_visits)
mu <- a_cafe[cafe_id] + b_cafe[cafe_id] * afternoon
sigma <- 0.5  # std dev within cafes
wait <- rnorm(N_visits * N_cafes, mu, sigma)
d <- data.frame(cafe=cafe_id, afternoon=afternoon, wait=wait)

# Model
set.seed(42)
m14.1 <- ulam(
  alist(
    wait ~ normal(mu, sigma),
    mu <- a_cafe[cafe] + b_cafe[cafe] * afternoon,
    c(a_cafe,b_cafe)[cafe] ~ multi_normal(c(a,b), Rho, sigma_cafe),
    a ~ normal(5, 2),
    b ~ normal(-1, 0.5),
    sigma_cafe ~ exponential(1),
    sigma ~ exponential(1),
    Rho ~ lkj_corr(2)
  ), data=d, chains=4, cores=4, refresh=0
)

precis(m14.1, pars="Rho[1, 2]")

##              result
## mean     0.01875514
## sd       0.23532935
## 5.5%    -0.36305534
## 94.5%    0.39079610
## n_eff 1900.18424871
## Rhat     0.99876875

# Plot posterior distribution
posterior <- extract.samples(m14.1)
dens(posterior$Rho[ , 1, 2])
legend("topright",
       pch=15,
       legend=c("Rho"))

# The posterior distribution of the correlation reflects this change because now the posterior for rho centers around 0. This, of course, is by design since we set rho to be equal to 0.

14-2. Fit this multilevel model to the simulated café data: \[\begin{align} \ W_i ∼ Normal(μ_i, σ) \\ \ μ_i = α_{CAFE[i]} + β_{CAFE[i]}A_i \\ \ α_{CAFE} ∼ Normal(α, σ_α) \\ \ β_{CAFE} ∼ Normal(β, σ_β) \\ \ α ∼ Normal(0, 10) \\ \ β ∼ Normal(0, 10) \\ \ σ, σ_α, σ_β ∼ Exponential(1) \\ \end{align}\] Use WAIC to compare this model to the model from the chapter, the one that uses a multi-variate Gaussian prior. Visualize the differences between the posterior means. Explain the result.

# Simulate the population
a <- 3.5 # average morning wait time
b <- (-1) # average difference afternoon wait time
sigma_a <- 1 # std dev in intercepts
sigma_b <- 0.5 # std dev in slopes
rho <- -0.7 # correlation between intercepts and slopes

# Create relevant variables
Mu <- c(a, b)
cov_ab <- sigma_a * sigma_b * rho
Sigma <- matrix(c(sigma_a^2, cov_ab, cov_ab, sigma_b^2), ncol=2)

# Simulation
N_cafes <- 20
set.seed(42)
vary_effects <- mvrnorm(N_cafes, Mu, Sigma)
a_cafe <- vary_effects[ ,1]
b_cafe <- vary_effects[ ,2]

N_visits <- 10
afternoon <- rep(0:1, N_visits * N_cafes/2)
cafe_id <- rep(1:N_cafes, each=N_visits)
mu <- a_cafe[cafe_id] + b_cafe[cafe_id] * afternoon
sigma <- 0.5  # std dev within cafes
wait <- rnorm(N_visits * N_cafes, mu, sigma)
d <- data.frame(cafe=cafe_id, afternoon=afternoon, wait=wait)

# Base model from chapter
set.seed(42)
m14.2_base <- ulam(
  alist(
    wait ~ normal(mu, sigma),
    mu <- a_cafe[cafe] + b_cafe[cafe] * afternoon,
    c(a_cafe,b_cafe)[cafe] ~ multi_normal(c(a,b), Rho, sigma_cafe),
    a ~ normal(5, 2),
    b ~ normal(-1, 0.5),
    sigma_cafe ~ exponential(1),
    sigma ~ exponential(1),
    Rho ~ lkj_corr(2)
  ), data=d, chains=4, cores=4, iter=2000, log_lik=TRUE, refresh=0
)

precis(m14.2_base)

##             mean         sd       5.5%      94.5%    n_eff     Rhat4
## a      3.3361545 0.32022138  2.8323511  3.8531714 4219.538 0.9996885
## b     -0.7640929 0.11785851 -0.9527759 -0.5761716 3445.447 1.0002171
## sigma  0.4640983 0.02562056  0.4252650  0.5076492 3362.908 1.0001821

# Model without correlation parameter
m14.2 <- ulam(
  alist(
    wait ~ dnorm(mu, sigma),
    mu <- a_cafe[cafe] + b_cafe[cafe] * afternoon,
    a_cafe[cafe] ~ dnorm(a, sigma_alpha),
    b_cafe[cafe] ~ dnorm(b, sigma_beta),
    a ~ dnorm(0, 10),
    b ~ dnorm(0, 10),
    sigma ~ dexp(1),
    sigma_alpha ~ dexp(1),
    sigma_beta ~ dexp(1)
  ),
  data=d, chains=4, cores=4, iter=2000, log_lik=TRUE, refresh=0
)

# Extract posterior results
# Intercepts
a_base_mod <- apply(extract.samples(m14.2_base)$a_cafe, 2, mean)
a_mod <- apply(extract.samples(m14.2)$a_cafe, 2, mean)

# Slopes
b_base_mod <- apply(extract.samples(m14.2_base)$b_cafe, 2, mean)
b_mod <- apply(extract.samples(m14.2)$b_cafe, 2, mean)

# Compare results
compare(m14.2_base, m14.2)

##                WAIC       SE    dWAIC      dSE    pWAIC    weight
## m14.2_base 295.2996 21.14359 0.000000       NA 31.64082 0.6931406
## m14.2      296.9293 20.77566 1.629686 2.006231 32.10835 0.3068594

# Plot difference between the posterior means
plot(a_mod, b_mod,
     xlab="Intercept", ylab="Slope",
     pch=15, col=rangi2, ylim=c(-1.5, -0.1))
points(a_base_mod, b_base_mod, pch=1)

# Based on the WAIC we see that the value between the base model (correlation-informed) and the no correlation model is similar. This is corroborated by the plot of the posterior means. We see that the correlation-informed model is not too far off from the no correlation model. That said, the correlation-informed model should be more accurate since it takes into account information that is missing from the no correlation model.

14-3. Use WAIC to compare the Gaussian process model of Oceanic tools to the models fit to the same data in Chapter 11. Pay special attention to the effective numbers of parameters, as estimated by WAIC. Explain the result.

# Data
data(Kline)
d <- Kline

# From chapter 11
d$P <- scale(log(d$population))
d$contact_id <- ifelse(d$contact == "high", 2, 1)
dat_11 <- list(T=d$total_tools, P=d$population, cid=d$contact_id)

# Gaussian process model
d$society <- 1:10
data("islandsDistMatrix")
dat_gauss <- list(T=d$total_tools, P=d$population, society=d$society, Dmat=islandsDistMatrix)

# Model
m11.11 <- ulam(
  alist(
    T ~ dpois(lambda),
    lambda <- exp(a[cid]) * P^b[cid] / g,
    a[cid] ~ dnorm(1, 1),
    b[cid] ~ dexp(1),
    g ~ dexp(1)
  ),
  data=dat_11, chains=4, cores=4, iter=2000, log_lik=TRUE, refresh=0
)

m14.3 <- ulam(
  alist(
    T ~ dpois(lambda),
    lambda <- (a * P^b / g) * exp(k[society]),
    vector[10]:k ~ multi_normal(0, SIGMA),
    matrix[10, 10]:SIGMA <- cov_GPL2(Dmat, etasq, rhosq, 0.01),
    c(a, b, g) ~ dexp(1),
    etasq ~ dexp(2),
    rhosq ~ dexp(0.5)
  ),
  data=dat_gauss, chains=4, cores=4, iter=2000, log_lik=TRUE, refresh=0
)

# Compare results
compare(m11.11, m14.3)

##            WAIC        SE    dWAIC      dSE    pWAIC      weight
## m14.3  67.75565  2.323249  0.00000       NA 4.161317 0.997875113
## m11.11 80.05946 11.075104 12.30382 11.16785 4.808338 0.002124887

# The model with the Gaussian process prior has a lower WAIC. Additionally we see that the Gaussian process model has less effective parameters. This could be an indication of regularization on the model with the Gaussian process prior.

14-4. Let’s revisit the Bangladesh fertility data, data(bangladesh), from the practice problems for Chapter 13. Fit a model with both varying intercepts by district_id and varying slopes of urban by district_id. You are still predicting use.contraception. Inspect the correlation between the intercepts and slopes. Can you interpret this correlation, in terms of what it tells you about the pattern of contraceptive use in the sample? Plot the mean (or median) varying effect estimates for both the intercepts and slopes, by district. Then you can visualize the correlation and maybe more easily think through what it means to have a particular correlation. Plot the predicted proportion of women using contraception, with urban women on one axis and rural on the other. Explain the results.

# Data
data(bangladesh)
d <- bangladesh
dat_list <- list(
  C = d$use.contraception,
  did=as.integer(as.factor(d$district)),
  urban=d$urban
)

# Model
m14.4 <- ulam(
  alist(
    C ~ bernoulli(p),
    logit(p) <- a[did] + b[did] * urban,
    c(a, b)[did] ~ multi_normal(c(a_bar, b_bar), Rho, Sigma),
    a_bar ~ normal(0, 1),
    b_bar ~ normal(0, 0.5),
    Rho ~ lkj_corr(2),
    Sigma ~ exponential(1)
  ),
  dat=dat_list, chains=4, cores=4, iter=2000, log_lik=TRUE, refresh=0
)

precis(m14.4)

##             mean        sd       5.5%      94.5%    n_eff     Rhat4
## a_bar -0.6878679 0.1009263 -0.8463173 -0.5270973 2462.498 1.0001702
## b_bar  0.6411844 0.1601882  0.3895893  0.9048042 2355.963 0.9997593

# b_bar has positive value, which means that contraception is used more amongst the urban population

precis(m14.4, depth=2, pars=c("Rho", "Sigma"))

##               mean         sd      5.5%     94.5%    n_eff    Rhat4
## Sigma[1] 0.5788722 0.09928925 0.4297144 0.7441117 816.1521 1.005156
## Sigma[2] 0.7743619 0.21589201 0.4334737 1.1082418 276.7083 1.011742

# Negative correlative between intercepts and slopes

# Plot
posterior <- extract.samples(m14.4)
a <- apply(posterior$a, 2, mean)
b <- apply(posterior$b, 2, mean)
R <- apply(posterior$Rho, 2:3, mean)
s <- apply(posterior$Sigma, 2, mean)
S <-diag(s) %*% R %*% diag(s)

plot(a, b, col=rangi2, xlab="a (Intercept)", ylab="b (Slope - Urban)")
for (l in c(0.5, 0.67, 0.89, 0.97)) {
  lines(ellipse(S, centre= c(mean(posterior$a_bar), mean(posterior$b_bar)),level=l), col=col.alpha("black",0.2))
}

# Areas with high usage of contraception which are not urban have a smaller slope, and vice versa. This means that areas with high contraceptive usage does not differ much between urban and non-urban areas.

14-5 Now consider the predictor variables age.centered and living.children, also contained in data(bangladesh). Suppose that age influences contraceptive use (changing attitudes) and number of children (older people have had more time to have kids). Number of children may also directly influence contraceptive use. Draw a DAG that reflects these hypothetical relationships. Then build models needed to evaluate the DAG. You will need at least two models. Retain district and urban, as in 14-4. What do you conclude about the causal influence of age and children?

# Draw DAG
dag_14.5 <- dagitty("dag{
                    Contraception <- Age
                    Number_of_Children <- Age
                    Contraception <- Number_of_Children
                    }"
            )
plot(dag_14.5)

# Data
data(bangladesh)
d <- bangladesh
dat_list <- list(
  C = d$use.contraception,
  did=as.integer(as.factor(d$district)),
  urban=d$urban
)
dat_list$children <- standardize(d$living.children)
dat_list$age <- standardize(d$age.centered)

# Model with age
m14.5_age <- ulam(
  alist(
    C ~ bernoulli(p),
    logit(p) <- a[did] + b[did] * urban + bA * age,
    c(a, b)[did] ~ multi_normal(c(a_bar, b_bar), Rho, Sigma),
    a_bar ~ normal(0, 1),
    c(b_bar, bA) ~ normal(0, 0.5),
    Rho ~ lkj_corr(2),
    Sigma ~ exponential(1)
  ),
  dat=dat_list, chains=4, cores=4, iter=2000, log_lik=TRUE, refresh=0
)

precis(m14.5_age)

##              mean         sd         5.5%      94.5%    n_eff     Rhat4
## a_bar -0.68956087 0.09781286 -0.845336355 -0.5341903 3330.752 0.9998721
## bA     0.08400588 0.04991807  0.003319606  0.1654923 6637.447 0.9996921
## b_bar  0.64035589 0.16025871  0.381645435  0.8939028 2036.378 1.0012235

precis(m14.5_age, depth=2, pars=c("Rho", "Sigma"))

##               mean         sd      5.5%     94.5%     n_eff    Rhat4
## Sigma[1] 0.5772453 0.09556097 0.4342694 0.7407226 1084.7371 1.000375
## Sigma[2] 0.7829233 0.20691397 0.4556329 1.1160162  331.7157 1.011440

# From here we see that age has a positive effect and is small. Contraception usage is higher as age increase.

# Model with age and number of children
m14.5_age_chil <- ulam(
  alist(
    C ~ bernoulli(p),
    logit(p) <- a[did] + b[did] * urban + bA * age + bN * children,
    c(a, b)[did] ~ multi_normal(c(a_bar, b_bar), Rho, Sigma),
    a_bar ~ normal(0, 1),
    c(b_bar, bA, bN) ~ normal(0, 0.5),
    Rho ~ lkj_corr(2),
    Sigma ~ exponential(1)
  ),
  dat=dat_list, chains=4, cores=4, iter=2000, log_lik=TRUE, refresh=0
)

precis(m14.5_age_chil)

##             mean         sd       5.5%      94.5%    n_eff     Rhat4
## a_bar -0.7160364 0.10209092 -0.8803664 -0.5573534 2945.039 1.0001517
## bN     0.5123331 0.07150175  0.3987077  0.6279361 3886.804 0.9998692
## bA    -0.2630735 0.07082144 -0.3758186 -0.1516876 4582.690 0.9994240
## b_bar  0.6896799 0.16067994  0.4328458  0.9445034 1888.957 1.0040296

precis(m14.5_age_chil, depth=2, pars=c("Rho", "Sigma"))

##               mean        sd      5.5%     94.5%     n_eff    Rhat4
## Sigma[1] 0.6085725 0.1027031 0.4566243 0.7768011 1169.5920 1.002468
## Sigma[2] 0.7775375 0.2058840 0.4665425 1.1216542  383.3598 1.002112

# Compare results
compare(m14.5_age, m14.5_age_chil)

##                    WAIC       SE    dWAIC      dSE    pWAIC      weight
## m14.5_age_chil 2413.032 30.36106  0.00000       NA 55.00186 1.00000e+00
## m14.5_age      2467.014 27.94485 53.98224 14.16972 52.78222 1.89629e-12

# Number of children has an impact on contraception usage as age increases. Additionally, as usage of contraception decrease, the number of children increases, which make sense. Overall, WAIC is lower for the model that includes both age and number of children.

Assignment #11

Chapter 14

Saurabh Nehete

2023-02-15

Chapter 14 - Adventures in Covariance

Questions