# Installing some useful packages
library(tidyverse)
library(kableExtra)
library(magrittr)
library(gridExtra)   # Combind the ggplot
library(invgamma)    # Inverse-Gamma
library(ggrepel)     # Automatically Position Non-Overlapping Text Labels with 'ggplot2'

Problem 1

(BDA3 ch5 Problem 15) Read BDA3 section 5.6 on the meta analysis. Implement the Bayesian analysis for mortality data to reproduce the results in the textbook.
Meta-analysis: perform the computations for the meta-analysis data of Table 5.4.
Results of 22 clinical trials of beta-blockers for reducing mortality after myocardial infarction
study \(y_{0j}\) \(n_{0j}\) \(y_{1j}\) \(n_{1j}\) \(y_{j}\) \(\sigma_{j}\)
1 3 39 3 38 0.028 0.850
2 14 116 7 114 -0.741 0.483
3 11 93 5 69 -0.541 0.565
4 127 1520 102 1533 -0.246 0.138
5 27 365 28 355 0.069 0.281
6 6 52 4 59 -0.584 0.676

According to the textbook (BDA3) section 5.6, Let \[ \begin{align} \left\{ \begin{aligned} &n_{0j}:Subjects\ in\ control\ groups \\ &n_{1j}:Subjects\ in\ treatment\ groups \\ \end{aligned} \right.,\quad \left\{ \begin{aligned} &y_{0j}:Deaths\ in\ control\ groups \\ &y_{1j}:Deaths\ in\ treatment\ groups \\ \end{aligned} \right.,\quad \theta_{j}:log\ odds\ ratio \end{align} \] One approach is based on empirical logits: for each study j, on can estimte \(\theta_{j}\) by \[ \begin{align} \left\{ \begin{aligned} &y_{j} = log\left( \frac{y_{1j}}{n_{1j}-y_{1j}}\right) - log\left( \frac{y_{0j}}{n_{0j}-y_{0j}}\right) \\ &\sigma_{j}^{2} = \frac{1}{y_{1j}} + \frac{1}{n_{1j}-y_{1j}} + \frac{1}{y_{0j}} + \frac{1}{n_{0j}-y_{0j}}\\ \end{aligned} \right. \end{align} \] Setting hierarchical normal model by section 5.4 \[ \begin{align} \left\{ \begin{aligned} & y_{j}|\theta_{j} \sim N(\theta_{j},\ \sigma_{j}^{2}) \\ & \theta_{j}|(\mu,\tau) \sim N(\mu,\ \tau^{2}) \\ & p(\mu,\ \tau) = p(\mu|\tau)*p(\tau) \propto p(\tau) \propto 1 \end{aligned} \right. \end{align} \] we can get
< Joint posterior > \[ \begin{align} p(\theta,\mu,\tau^{2}|y) &\propto p(y|\theta) * p(\theta|\mu,\tau) * p(\mu,\tau) \\ &\propto \prod_{j=1}^{J} N(y_{j}|\theta_{j},\ \sigma_{j}^{2}) * \prod_{j=1}^{J} N(\theta_{j}|\mu,\ \tau^{2}) * p(\mu,\tau) \end{align} \] < Conditional posterior > \[ \begin{align} p(\theta|\mu,\tau,y) &\propto p(y|\theta)*p(\theta|\mu,\tau)*p(\mu,\tau) \\ \Rightarrow \theta_{j}|(\mu,\tau,y) &\sim N(\hat{\theta}_{j},\ V_{j}) \\ Where,\quad \hat{\theta}_{j} &= \frac{\frac{1}{\sigma_{j}^{2}}y_{j}+\frac{1}{\tau^{2}}\mu}{\frac{1}{\sigma_{j}^{2}} + \frac{1}{\tau^{2}}}, \quad V_{j}= \frac{1}{\frac{1}{\sigma_{j}^{2}} + \frac{1}{\tau^{2}}} \end{align} \] < Marginal posterior > \[ \begin{align} P(\mu,\tau|y) &\propto p(y|\mu,\tau) * p(\mu,\tau) \\ &\propto \prod_{j=1}^{J}N(y_{j}|\mu,\ \sigma_{j}^{2}+\tau^{2}) * p(\mu,\tau) \end{align} \]

  1. Plot the posterior density of \(\tau\) over an appropriate range that includes essentially all of the posterior density, analogous to Figure 5.5.
    由前述得\(P(\mu,\tau|y)\),且\(P(\mu,\tau|y)\)能被分解成 \[ \begin{align} P(\mu,\tau|y) &= p(\mu|\tau,\ y) * p(\tau|y) \\ \Rightarrow p(\mu|\tau,\ y) &= \frac{P(\mu,\tau|y)}{p(\tau|y)} \\ &\propto P(\mu,\tau|y) \\ \Rightarrow \mu|(\tau,\ y) &\sim N(\hat{\mu},\ V_{u}) \\ where,\ \hat{\mu} &= \frac{\sum_{j=1}^{J} \frac{y_{j}}{\sigma_{j}^{2}+\tau^{2}}} {\sum_{j=1}^{J} \frac{1}{\sigma_{j}^{2}+\tau^{2}}},\ V_{u} = \frac{1}{\sum_{j=1}^{J}\frac{1}{\sigma_{j}^{2}+\tau^{2}}} \end{align} \] so, the posterior distribution of \(\tau\) is \[ \begin{align} p(\tau|y) &= \frac{P(\mu,\tau|y)}{p(\mu|\tau,y)} \\ &\propto \frac{P(\mu,\tau|y)}{p(\mu|\tau,y)} \\ &\propto \frac{\prod_{j=1}^{J}N(y_{j}|\mu,\ \sigma_{j}^{2}+\tau^{2}) * p(\tau)}{N(\mu|\hat{\mu},V_{u})} \\ &\propto p(\tau) * V_{u}^{1/2} * \prod_{j=1}^{J}(\sigma_{j}^{2}+\tau^{2})^{-1/2} exp \left\{ \frac{-(y_{j}-\hat{\mu})^{2}}{2(\sigma_{j}^{2}+\tau^{2})} \right\} \end{align} \] 將上述marginal poseterior of\(\tau\) 繪製如下,且因hierarchical model 設定中,\(\tau^{2}\)\(\theta_{j}\)中的變異數,故僅考慮正數

    # Visualization of posterior dist. of tau 
    ## Writing posterior dist. of tau
    tau_dist <- function(tau = 3){
      # Define some constant 
      y <- data.p1$y_j 
      sigma_2 <- data.p1$sigma_j^2
      V_u <- 1 / sum( 1/(sigma_2 + tau^2) )
      mu_hat <- sum( y / ( sigma_2 + tau^2) ) / 
        sum( 1/ ( sigma_2 + tau^2 ) )
      # log density function
      z1 <- 0.5 * log(V_u)
      z2 <- -0.5 *log(sigma_2 + tau^2)
      z3 <- -((y - mu_hat)^2) / (2*(sigma_2 + tau^2) )
      z4 <- (z2 + z3) %>% sum
      value <- (z1 + z4) %>% exp()
      return(value)
    }
    # Visualization
    tau.plot <- data.frame(x = seq(0.01, 1, 
                                   length.out = 1000))
    tau.plot$y <- tau.plot %>% apply(., 1, tau_dist)
    tau.plot$y %<>%  divide_by(sum(tau.plot$y)) # Normalizing
    tau_plot <- ggplot(data = tau.plot, aes(x = x, y = y)) + 
      geom_line(color = 'black') + 
      labs(x = expression(tau), 
           y = expression(paste("p(", tau, "|y)")), 
           title = "Marginal posterior density"
           ) + 
      theme_grey(base_size = 13)
    tau_plot + xlim(0, 0.5) 
  2. Produce graphs analogous to Figures 5.6 and 5.7 to display how the posterior means and standard deviations of the \(\theta_{j}'s\) depend on \(\tau\).
    We know \[ \begin{align} \theta_{j}|(\mu,\tau,y) &\sim N(\hat{\theta}_{j},\ V_{j}) \\ Where,\quad \hat{\theta}_{j} &= \frac{\frac{1}{\sigma_{j}^{2}}y_{j}+\frac{1}{\tau^{2}}\mu}{\frac{1}{\sigma_{j}^{2}} + \frac{1}{\tau^{2}}}, \quad V_{j}= \frac{1}{\frac{1}{\sigma_{j}^{2}} + \frac{1}{\tau^{2}}} \end{align} \] 由上述可知posterior means and standard deviations of the\(\theta_{j}'s\) depends on \(\tau\).(其中\(\mu\)可用(a)小題之\(\hat{\mu}\)帶入)

    # Writing posterior mean/var function
    post.mean_fun <- function(tau, yj, sj_2){
      # Define constant 
      y <- data.p1$y_j 
      sigma_2 <- data.p1$sigma_j^2
      mu_hat <- sum( y / ( sigma_2 + tau^2) ) / 
        sum( 1/ ( sigma_2 + tau^2 ) )
      # Calculate posterior mean
      z1 <- (yj/sj_2) + mu_hat/(tau^2)
      z2 <- (1/sj_2) + (1/(tau^2))
      value <- z1/z2
      return(value)
    }
    
    # Posterior variance function
    post.var_fun <- function(tau, sj_2){
      z1 <- (1/sj_2) + (1/tau^2)
      return(1/z1)
    }
    # ???ƾ??z
    npoint <- 1000
    df.post <- data.frame(
      x = rep(seq(0.01, 1, length.out = npoint), 22), 
      y = rep(data.p1$y_j , each = npoint), 
      s = rep(data.p1$sigma_j^2 , each = npoint)
      )
    
    mean.value <- as.numeric()
    for (i in 1:dim(df.post)[1]) {
      mean.value[i] <- post.mean_fun(tau = df.post[i, 1], 
                                     yj = df.post[i, 2], 
                                     sj_2 = df.post[i, 3])
    }
    var.value <- as.numeric()
    for (i in 1:dim(df.post)[1]) {
      var.value[i] <- post.var_fun(tau = df.post[i, 1],
                                   sj_2 = df.post[i, 3])
    }
    
    df.post$mean <- mean.value
    df.post$sd <- var.value %>% sqrt
    df.post$class <- rep(seq(1,22), each = npoint)
    df.post <- df.post[, -c(2, 3)]
    # Visualization of conditional mean 
    cond.mean_plot <- ggplot(data = df.post, 
                             aes(x = x , y = mean, 
                                 group = class)) + 
      geom_line(aes(colour = class %>% as.factor())) + 
      labs(x = expression(tau), 
       y = expression(paste("E( ", theta[j]," | ", 
                            tau, ", ", y, " )"))) + 
      theme_grey(base_size = 13) + 
      theme(legend.position = "none")
    # Visualization of conditional variance 
    cond.sd_plot <- ggplot(data = df.post, 
                            aes(x = x , y = sd,
                                group = class)) + 
      geom_line(aes(colour = class %>% as.factor())) + 
      labs(x = expression(tau), 
       y = expression(paste("sd( ", theta[j]," | ", 
                            tau, ", ", y, " )"))) + 
      theme_grey(base_size = 13) + 
      theme(legend.position = "none")
    grid.arrange(cond.mean_plot, cond.sd_plot, 
                 ncol = 2)

    由上述圖形可看出,當\(\tau\)增加時,posterior mean and posterior variance 會慢慢收斂即

\[ \begin{align} & \hat{\theta}_{j} = \frac{\frac{1}{\sigma_{j}^{2}}y_{j}+\frac{1}{\tau^{2}}\mu}{\frac{1}{\sigma_{j}^{2}} + \frac{1}{\tau^{2}}} \stackrel{\tau \rightarrow \infty}{\longrightarrow} y_{j}\\ & V_{j}= \frac{1}{\frac{1}{\sigma_{j}^{2}} + \frac{1}{\tau^{2}}} \stackrel{\tau \rightarrow \infty}{\longrightarrow} \sigma_{j}^{2} \end{align} \]

  1. Produce a scatterplot of the crude effect estimates vs. the posterior median effect estimates of the 22 studies. Verify that the studies with smallest sample sizes are partially pooled the most toward the mean.

    # Writing simulation function 
    simu.cond.post.dist_fun <- function(tau){
      y <- data.p1$y_j
      s_2 <- data.p1$sigma_j^2
      cond.theta_list <- vector("list", length(y)) %>% 
      `names<-`(., paste("theta", seq(1, 22)) %>% 
                  gsub(" ", "_", .) )
      for (i in 1:length(y)) {
        for (j in 1:length(tau)) {
          cond.mu <- post.mean_fun(tau = tau[j], 
                                   yj = y[i], 
                                   sj_2 = s_2[i])
          cond.sd <- post.var_fun(tau = tau[j], 
                                   sj_2 = s_2[i]) %>% sqrt
          cond.theta_list[[i]][j] <- rnorm(1, mean = cond.mu, 
                                           sd = cond.sd)
        }
      }
      return(cond.theta_list)
    }
    # Simulation conditional posterior distribution
    set.seed(2019)
    tau.index <-sample(1:1000, 
                       size = 1000, replace = TRUE, 
                       prob = tau.plot$y)
    tau <- tau.plot[tau.index, ]$x # length() = 1000
    cond.post.median <- simu.cond.post.dist_fun(tau = tau) %>% 
      lapply(., median) %>% unlist
    pooled.mean <- sum( data.p1$y_j/ (data.p1$sigma_j)^2) /
      sum(1/ (data.p1$sigma_j)^2)
    # Visualization 
    ggplot(data = NULL, 
           aes(x = data.p1$y_j, y = cond.post.median)) + 
      geom_point(aes(colour = (data.p1$n0 + data.p1$n1) ),
                 size = 3) + 
      labs(x = expression(y[j]),
           y = "posterior median", 
           color = "Sample size") + 
      geom_text_repel(aes(x = data.p1$y_j, 
                          y = cond.post.median, 
                          label =  seq(1, 22))) +
      geom_hline(yintercept = pooled.mean, 
                 col = "red", 
                 linetype = "dashed")  +
      scale_color_continuous(low = "lightblue", 
                             high = "#003366") + 
      theme_grey(base_size = 13)

    由上述圖形可看出,study_1(最少樣本數)的中位數很靠近pooled mean

  2. Draw simulations from the posterior distribution of a new treatment effect, \(\tilde{\theta}_{j}\) . plot a histogram of the simulations.
    According to the textbook BDA3 p118, which say To obtain posterior predictive simulations of new data \(\tilde{y}\) , perform the following steps

    step 1: Draw \((\mu,\ \tau)\) from \(p(\mu,\tau|y) = p(\mu|\tau,y)*p(\tau|y)\)

    step 2: Draw \(\tilde{\theta}=(\tilde{\theta}_{1},\tilde{\theta}_{2},...\tilde{\theta}_{\tilde{J}})\) from \(p(\tilde{\theta}_{j} |\mu,\tau)\), which is the prior distribution of \(\theta\) given the hyperparameter

    # Step 1: Draw (mu, tau) 
    nsample <- 5000
    set.seed(2019)
    # step 1-1 : first sample from p(tau | y)
    tau.plot <- data.frame(x = seq(0, 1, 
                                   length.out = 10000))
    tau.plot$y <- tau.plot %>% apply(., 1, tau_dist)
    tau.plot$y %<>%  divide_by(sum(tau.plot$y)) # Normalizing
    
    tau.index <- sample(1:10000, 
                        size = nsample, replace = TRUE, 
                        prob = tau.plot$y)
    tau <- tau.plot[tau.index, 1] 
    
    # step 1-2 : second sample from p(mu | tau, y )
    
    nu_hat <- function(tau = 0.2){
      z1 <- (data.p1$y_j/(data.p1$sigma_j^2 + tau^2)) %>% sum
      z2 <- (1/(data.p1$sigma_j^2 + tau^2)) %>% sum
      values <- z1/z2
      return(values)
      }
    
    Vu <- function(tau = 0.2){
      z1 <- (1/(data.p1$sigma_j^2 + tau^2)) %>% sum
      values <- 1/z1
      return(values)
      }
    
    mu <- as.numeric()
    for (i in 1:nsample) {
      mu[i] <- rnorm(1, 
                     mean = nu_hat(tau = tau[i]), 
                     sd = Vu(tau = tau[i]) %>% sqrt)
    }
    
    # Step 2 : sample from p(theta|mu, tau)
    pred.theta <- mapply(rnorm, 
                         n = rep(1, nsample), 
                         mean = mu,  
                         sd = tau)
    # Visualization 
    theta.info_table <- c(pred.theta %>% mean, 
                          pred.theta %>% var) %>% 
      round(., 4) %>% 
      matrix(., ncol = 1) %>% 
      `colnames<-`(., "Value") %>% 
      `rownames<-`(., c("Mean", "Var"))
    
    ggplot(data = NULL, aes(x = pred.theta)) + 
      geom_histogram(bins = 40, 
                      fill = 'light blue', color = 'black') + 
      labs(x = expression(tilde(theta)), 
           y = "", 
           title = "Histogram") + 
      annotation_custom(tableGrob(theta.info_table), 
                        xmin = 0.2, xmax = 0.5, 
                        ymin = 600, ymax = 800)

    Above the figure. it;s the histogram of \(\tilde{\theta}\)

  3. Given the simulations just obtained, draw simulated outcomes from replications of a hypothetical new experiment with 100 persons in each of the treated and control groups. Plot a histogram of the simulations of the crude estimated treatment effect (5.23) in the new experiment.

Problem 2

Suppose an electronic part having lifetime modeled as an exponential distribution: \[ p(y|\theta)= \frac{1}{\theta}*exp\left\{\frac{-y}{\theta}\right\},\quad with\ \ mean\ \ \theta\ \ and\ \ variance\ \ \theta^{2} \] A retailer gets two specimen of the part from each of 10 manufacturers.
Let \({\bf y}_{j} = (y_{j1},y_{j2})\) represent the lifetimes of the two specimen from the jth manufacturer, and \(\theta_{j}\) represent the exponential parameter accordingly. If we assume the manufacturers are exchangeable, then \(\theta_{j}\) can be modeled as random samples from a population. A natural population distribution for \(\theta_{j}\) is the inverse-Gamma(\(\alpha\), \(\beta\)) distribution. The prior \(\pi(\alpha,\ \beta) \propto \beta^{-5/2}\) is used in the following analysis.
Let \({\bf Y} = ({\bf y}_{1}, {\bf y}_{2}, ..., {\bf y}_{10})\).

Data
1 2 3 4 5 6 7 8 9 10
\(y_{j1}\) 1.47 1.75 0.66 5.59 3.70 0.81 0.95 2.24 0.25 2.71
\(y_{j2}\) 0.25 5.58 0.61 0.54 8.42 0.05 1.21 2.03 2.82 0.62

According to the above statements, we can set the hierarchical model \[ \begin{align} \left\{ \begin{aligned} & y_{j}|\theta_{j} \sim Exp(\theta_{j}) \\ & \theta_{j}|(\alpha,\beta) \sim Inv-gamma(\alpha,\ \beta) \\ & p(\alpha,\ \beta) \propto \beta^{-5/2} \end{aligned} \right. \end{align} \] (a) Derive the (unnormalized) joint posterior of (\(\alpha,\ \beta,\ \theta_{1},\ \theta_{2}, ...,\ \theta_{10}\)) given data \({\bf Y}\)
< Proof >
Let \(\underset{\tilde{}}{\theta}=(\theta_{1},\ \theta_{2}, ...,\ \theta_{10})\) \[ \begin{align} p(\alpha,\ \beta,\ \underset{\tilde{}}{\theta}| {\bf Y} ) &\propto p({\bf Y}|\underset{\tilde{}}{\theta})*p(\underset{\tilde{}}{\theta}|\alpha,\ \beta)*\pi(\alpha,\ \beta) \\ &\propto \prod_{j=1}^{10}p({\bf y}_{j}|\theta_{j})* \prod_{j=1}^{10} p(\theta_{j}|\alpha,\ \beta)*\pi(\alpha,\ \beta),\quad by\ Exchangeable,\ de\ Finetti's \\ &\propto \prod_{j=1}^{10} \left(\frac{1}{\theta_{j}}\right)^{2} * exp\left\{ \frac{-(y_{j1}+y_{j2})}{\theta_{j}} \right\} * \prod_{j=1}^{10} \frac{\beta^{\alpha}}{\Gamma(\alpha)}\theta_{j}^{-(\alpha+1)}exp\left\{\frac{-\beta}{\theta_{j}} \right\} * \frac{1}{\beta^{5/2}} \end{align} \] (b) Derive the conditional posterior given hyperparameters: \(p(\theta_{j} |\alpha,\ \beta, {\bf Y})\).
< Proof > \[ \begin{align} p(\underset{\tilde{}}{\theta}|\alpha,\ \beta, {\bf Y}) &\propto p(\underset{\tilde{}}{\theta},\ {\bf Y},\ \alpha,\ \beta) \\ &\propto p({\bf Y}|\underset{\tilde{}}{\theta},\ \alpha,\ \beta)*p(\underset{\tilde{}}{\theta}|\alpha,\ \beta)*\pi(\alpha,\ \beta) \\ &\propto p({\bf Y}|\underset{\tilde{}}{\theta})*p(\underset{\tilde{}}{\theta}|\alpha,\ \beta)*\pi(\alpha,\ \beta) \\ &\propto \prod_{j=1}^{10} \left(\frac{1}{\theta_{j}}\right)^{2} * exp\left\{ \frac{-(y_{j1}+y_{j2})}{\theta_{j}} \right\} * \prod_{j=1}^{10} \theta_{j}^{-(\alpha+1)}exp\left\{\frac{-\beta}{\theta_{j}} \right\} \\ &\propto \prod_{j=1}^{10} \theta_{j}^{-(\alpha+2+1)} exp\left\{ \frac{-(y_{j1}+y_{j1}+\beta)}{\theta_{j}} \right\} \end{align} \] According to the above the equation, the the \(\theta_{j}'s\) are independent, so \[ \begin{align} \theta_{j}|(\alpha,\ \beta,\ {\bf Y}) \sim Inverse-Gamma(\alpha^{*} = \alpha+2,\ \beta^{*} = y_{j1}+y_{j2}+\beta) \end{align} \]

  1. Derive the (unnormalized) marginal posterior: \(p(\alpha,\ \beta|{\bf Y})\). Using grid approximation, you may plot \(p(\alpha,\ \beta|{\bf Y})\) in image with contours.
    < Proof > \[ \begin{align} p(\alpha,\ \beta|{\bf Y}) \propto p({\bf Y}|\alpha,\ \beta)*\pi(\alpha,\ \beta) \end{align} \] First, we find \(p({\bf Y}|\alpha,\ \beta)\) \[ \begin{align} p({\bf y_{j}}|\alpha,\ \beta) &= \int p({\bf y_{j}}|\theta_{j})*p(\theta_{j}|\alpha,\ \beta) d\theta_{j} \\ &= \int_{0}^{\infty} \left(\frac{1}{\theta_{j}}\right)^{2} * exp\left\{ \frac{-(y_{j1}+y_{j2})}{\theta_{j}} \right\} * \frac{\beta^{\alpha}}{\Gamma(\alpha)} \theta_{j}^{-(\alpha+1)}exp\left\{\frac{-\beta}{\theta_{j}} \right\} d\theta_{j} \\ &= \frac{\beta^{\alpha}}{\Gamma(\alpha)} \int_{0}^{\infty} \theta_{j}^{-(\alpha+2+1)} exp\left\{\frac{-(y_{j1}+y_{j2}+\beta)}{\theta_{j}}\right\} d\theta_{j},\quad by\ Inv-gamma\ function \\ &= \frac{\beta^{\alpha}}{\Gamma(\alpha)} \frac{\Gamma(\alpha+2)}{(y_{j1}+y_{j2}+\beta)^{\alpha+2}} \\ \Rightarrow p({\bf Y}|\alpha,\ \beta) &= \prod_{j=1}^{10} \frac{\beta^{\alpha}}{\Gamma(\alpha)} \frac{\Gamma(\alpha+2)}{(y_{j1}+y_{j2}+\beta)^{\alpha+2}} \end{align} \] So, \[ \begin{align} p(\alpha,\ \beta|{\bf Y}) &\propto p({\bf Y}|\alpha,\ \beta)*\pi(\alpha,\ \beta) \\ &\propto \prod_{j=1}^{10} \frac{\beta^{\alpha}}{\Gamma(\alpha)} \frac{\Gamma(\alpha+2)}{(y_{j1}+y_{j2}+\beta)^{\alpha+2}}*\frac{1}{\beta^{5/2}} \end{align} \] Using grid approximation to plot the marginal posterior distribution.

    # Visualization
    # Data
    y_j1 <- c(1.47, 1.75, 0.66, 5.59, 3.70, 0.81, 0.95, 2.24, 0.25, 2.71)
    y_j2 <- c(0.25, 5.58, 0.61, 0.54, 8.42, 0.05, 1.21, 2.03, 2.82, 0.62)
    
    # Log.magrinal 
    log.magrinal <- function(alpha = 1, beta = 2){
      z1 <- beta^{alpha} / gamma(alpha)
      z2 <- gamma(alpha + 2) / (y_j1 + y_j2 + beta)^{alpha+2}
      z3 <- beta^{-5/2}
      value <- sum( log(z1 * z2) ) + log(z3)
      return(value)
      }
    
    # Grid approximation
    m <- 100
    alpha.seq <- seq(0.1, 4.5, length.out = m)
    beta.seq <- seq(0.1, 7, length.out = m)
    grid.value_magrinal <- data.frame(alpha = rep(alpha.seq, 
                                                  each = m), 
                                      beta = rep(beta.seq, 
                                                 times = m))
    grid.value_magrinal$density <- grid.value_magrinal %>% 
      apply(., 1,  FUN = function(x){ 
        log.magrinal(alpha = x[1], beta = x[2])
        }) %>% exp()
    
    # Visualization
    marginal_dist <- ggplot(data = grid.value_magrinal, 
                            aes(x = alpha, y = beta)) + 
      geom_raster(aes(fill = density), interpolate = TRUE) +
      geom_contour(aes(z = density), 
                   colour = "black", size = 0.3) + 
      scale_fill_gradientn(colours = c("white", "yellow", "red")) + 
      labs(title = "Marginal posterior evaluated in grid", 
           x = expression(alpha), y = expression(beta), 
           caption = "Contours plot") +
      theme_grey(base_size = 13) + 
      theme(legend.position = "none") 
    marginal_dist

  2. Using the observed data, draw iid samples of (\(\alpha,\ \beta,\ ?c_{1},\ ?c_{2}, ...,\ ?c_{10}\)) from the joint posterior in (a) via a 2-step procedure:

    step 1: draw \((\alpha^{(i)},\ \beta^{(i)})\) from the grid approximation of \(p(\alpha,\ \beta|{\bf Y})\) in (c)

    step 2: draw \(\theta^{(i)}_{j}\) from \(p(\theta_{j} |\alpha^{(i)},\ \beta^{(i)}, {\bf Y})\) in (b) for j = 1,2,…,10.

    Summarize the posterior distribution of each parameters as follows using simulated random samples:

    # Step 1 : Sampling from marginal posterior 
    set.seed(2019)
    nsample <- 1000  # smpling number
    y <- y_j1 + y_j2
    index <- sample(1:(m^2), 
                    size = nsample, replace = TRUE, 
                    prob = grid.value_magrinal$density)
    hyper.parameter.table <- grid.value_magrinal[index, ] %>% 
      .[,c(1,2)] 
    
    # Step 2 : Sampling from conditional posterior
    theta.list <- vector("list", 10) %>% 
      `names<-`(., paste("theta", seq(1, 10)) %>% 
                  gsub(" ", "_", .) )
    
    for (i in 1:10) {
      theta.list[[i]] <- rinvgamma(n = 1000, 
                                   shape = hyper.parameter.table$alpha + 2, 
                                   rate = y[i] + hyper.parameter.table$beta)
      }
    
    # Step 3 : Summarize 
    ## Writing calculate infromation function 
    info_fun <- function(x){
      mu <- mean(x)
      med <- median(x)
      sd <- sd(x)
      LCI.90 <- x %>% sort %>% .[50]
      UCI.90 <- x %>% sort %>% .[950]
      return(c(mu, med, sd, LCI.90, UCI.90) )
      }
    
    rbind(
      hyper.parameter.table %>% apply(., 2, info_fun) %>% t, 
      theta.list %>% lapply(., info_fun) %>% unlist() %>% 
        matrix(., ncol = 5, by = TRUE) %>% 
        `row.names<-`(., paste("theta", seq(1, 10)) %>% 
                        gsub(" ", "_", .))
      ) %>% 
      `colnames<-`(., c("mean", "median", "sd", "90%_LCI", "90%_UCI")) %>% as.matrix() %>% 
      round(., 4) %>% 
      `rownames<-`(., c("$\\alpha$", "$\\beta$", 
                        "$\\theta_{1}$", "$\\theta_{2}$", 
                        "$\\theta_{3}$", "$\\theta_{4}$", 
                        "$\\theta_{5}$", "$\\theta_{6}$", 
                        "$\\theta_{7}$", "$\\theta_{8}$", 
                        "$\\theta_{9}$", "$\\theta_{10}$")) %>%
      kable(., "html", caption = "Summarization") %>%
      kable_styling(bootstrap_options = "striped", full_width = F) %>% 
      add_header_above(c("parameter", "posterior" = 5))
    Summarization
    parameter
    posterior
    mean median sd 90%_LCI 90%_UCI
    \(\alpha\) 1.9836 1.7889 1.0258 0.6333 4.0111
    \(\beta\) 2.7126 2.4697 1.7704 0.4485 6.0939
    \(\theta_{1}\) 1.4356 1.1384 1.1486 0.4601 3.3190
    \(\theta_{2}\) 3.6089 2.7754 2.7927 1.2695 9.0403
    \(\theta_{3}\) 1.3184 1.0507 1.1194 0.4142 2.9606
    \(\theta_{4}\) 3.0714 2.4256 2.7744 1.1286 6.3759
    \(\theta_{5}\) 5.4554 4.0697 5.4203 1.8923 12.8729
    \(\theta_{6}\) 1.1484 0.9163 0.8438 0.3305 2.6624
    \(\theta_{7}\) 1.6310 1.2727 1.3922 0.5866 3.5593
    \(\theta_{8}\) 2.4265 1.8537 1.8691 0.8453 5.7407
    \(\theta_{9}\) 1.9728 1.4856 1.8676 0.6741 4.5243
    \(\theta_{10}\) 2.0881 1.6473 1.7587 0.7249 4.7117
  3. Make a plot similar to Figure 5.4 in BDA3 but using \(\bar{y}_{j} = (y_{j1} + y_{j2})/2\) in the x-axis and the posterior mean of \(\theta_{j}\) together with the 90% posterior interval in the y-axis, and comment on it.

    y_bar <- (y_j1 + y_j2)/2
    theta_mean <- theta.list %>% lapply(., mean) %>% unlist
    CI_90 <- theta.list %>% lapply(., info_fun) %>% 
      unlist() %>% matrix(., ncol = 5, by = TRUE) %>% 
      .[, c(4, 5)]
    # Visualization
    ggplot(data = NULL, 
           aes(x = y_bar, y = theta_mean)) + 
      geom_point() + 
      geom_errorbar(aes(ymin = CI_90[, 1], 
                        ymax = CI_90[, 2])) + 
      geom_abline(intercept = 0, slope = 1, color = "red", 
                  linetype = "dashed") + 
      labs(x = expression(bar(y)[j]), 
           y = expression(paste("Posterior mean of ", theta[j] )), 
           caption = "90% posterior interval") + 
      geom_text_repel(aes(x = y_bar, y = theta_mean, 
                          label =  y_bar %>% seq) ) +
      theme_grey(base_size = 13)

    Above the figure, we know,
    1. 90% posterior interval of \(\theta_{j}\)均包含45度線(紅色),表用貝氏分析所估計的 \(\theta_{j}\)和用\(\bar{y}_{j}\)所估計的的 \(\theta_{j}\)很相近。
    2. 隨著\(\bar{y}_{j}\)的增加,90% posterior interval of \(\theta_{j}\) 會變大