# Installing some useful packages
library(tidyverse)
library(kableExtra)
library(magrittr)
library(gridExtra)   # Combind the ggplot
library(invgamma)    # Inverse-Gamma
library(ggrepel)     # Automatically Position Non-Overlapping Text Labels with 'ggplot2'

Problem 1

(BDA3 ch5 Problem 15) Read BDA3 section 5.6 on the meta analysis. Implement the Bayesian analysis for mortality data to reproduce the results in the textbook.
Meta-analysis: perform the computations for the meta-analysis data of Table 5.4.

Results of 22 clinical trials of beta-blockers for reducing mortality after myocardial infarction
study	$y_{0j}$	$n_{0j}$	$y_{1j}$	$n_{1j}$	$y_{j}$	$\sigma_{j}$
1	3	39	3	38	0.028	0.850
2	14	116	7	114	-0.741	0.483
3	11	93	5	69	-0.541	0.565
4	127	1520	102	1533	-0.246	0.138
5	27	365	28	355	0.069	0.281
6	6	52	4	59	-0.584	0.676

According to the textbook (BDA3) section 5.6, Let \[ \begin{align} \left\{ \begin{aligned} &n_{0j}:Subjects\ in\ control\ groups \\ &n_{1j}:Subjects\ in\ treatment\ groups \\ \end{aligned} \right.,\quad \left\{ \begin{aligned} &y_{0j}:Deaths\ in\ control\ groups \\ &y_{1j}:Deaths\ in\ treatment\ groups \\ \end{aligned} \right.,\quad \theta_{j}:log\ odds\ ratio \end{align} \] One approach is based on empirical logits: for each study j, on can estimte $\theta_{j}$ by \[ \begin{align} \left\{ \begin{aligned} &y_{j} = log\left( \frac{y_{1j}}{n_{1j}-y_{1j}}\right) - log\left( \frac{y_{0j}}{n_{0j}-y_{0j}}\right) \\ &\sigma_{j}^{2} = \frac{1}{y_{1j}} + \frac{1}{n_{1j}-y_{1j}} + \frac{1}{y_{0j}} + \frac{1}{n_{0j}-y_{0j}}\\ \end{aligned} \right. \end{align} \] Setting hierarchical normal model by section 5.4 \[ \begin{align} \left\{ \begin{aligned} & y_{j}|\theta_{j} \sim N(\theta_{j},\ \sigma_{j}^{2}) \\ & \theta_{j}|(\mu,\tau) \sim N(\mu,\ \tau^{2}) \\ & p(\mu,\ \tau) = p(\mu|\tau)*p(\tau) \propto p(\tau) \propto 1 \end{aligned} \right. \end{align} \] we can get
< Joint posterior > \[ \begin{align} p(\theta,\mu,\tau^{2}|y) &\propto p(y|\theta) * p(\theta|\mu,\tau) * p(\mu,\tau) \\ &\propto \prod_{j=1}^{J} N(y_{j}|\theta_{j},\ \sigma_{j}^{2}) * \prod_{j=1}^{J} N(\theta_{j}|\mu,\ \tau^{2}) * p(\mu,\tau) \end{align} \] < Conditional posterior > \[ \begin{align} p(\theta|\mu,\tau,y) &\propto p(y|\theta)*p(\theta|\mu,\tau)*p(\mu,\tau) \\ \Rightarrow \theta_{j}|(\mu,\tau,y) &\sim N(\hat{\theta}_{j},\ V_{j}) \\ Where,\quad \hat{\theta}_{j} &= \frac{\frac{1}{\sigma_{j}^{2}}y_{j}+\frac{1}{\tau^{2}}\mu}{\frac{1}{\sigma_{j}^{2}} + \frac{1}{\tau^{2}}}, \quad V_{j}= \frac{1}{\frac{1}{\sigma_{j}^{2}} + \frac{1}{\tau^{2}}} \end{align} \] < Marginal posterior > \[ \begin{align} P(\mu,\tau|y) &\propto p(y|\mu,\tau) * p(\mu,\tau) \\ &\propto \prod_{j=1}^{J}N(y_{j}|\mu,\ \sigma_{j}^{2}+\tau^{2}) * p(\mu,\tau) \end{align} \]

Plot the posterior density of $\tau$ over an appropriate range that includes essentially all of the posterior density, analogous to Figure 5.5.
由前述得$P(\mu,\tau|y)$，且$P(\mu,\tau|y)$能被分解成 \[ \begin{align} P(\mu,\tau|y) &= p(\mu|\tau,\ y) * p(\tau|y) \\ \Rightarrow p(\mu|\tau,\ y) &= \frac{P(\mu,\tau|y)}{p(\tau|y)} \\ &\propto P(\mu,\tau|y) \\ \Rightarrow \mu|(\tau,\ y) &\sim N(\hat{\mu},\ V_{u}) \\ where,\ \hat{\mu} &= \frac{\sum_{j=1}^{J} \frac{y_{j}}{\sigma_{j}^{2}+\tau^{2}}} {\sum_{j=1}^{J} \frac{1}{\sigma_{j}^{2}+\tau^{2}}},\ V_{u} = \frac{1}{\sum_{j=1}^{J}\frac{1}{\sigma_{j}^{2}+\tau^{2}}} \end{align} \] so, the posterior distribution of $\tau$ is \[ \begin{align} p(\tau|y) &= \frac{P(\mu,\tau|y)}{p(\mu|\tau,y)} \\ &\propto \frac{P(\mu,\tau|y)}{p(\mu|\tau,y)} \\ &\propto \frac{\prod_{j=1}^{J}N(y_{j}|\mu,\ \sigma_{j}^{2}+\tau^{2}) * p(\tau)}{N(\mu|\hat{\mu},V_{u})} \\ &\propto p(\tau) * V_{u}^{1/2} * \prod_{j=1}^{J}(\sigma_{j}^{2}+\tau^{2})^{-1/2} exp \left\{ \frac{-(y_{j}-\hat{\mu})^{2}}{2(\sigma_{j}^{2}+\tau^{2})} \right\} \end{align} \] 將上述marginal poseterior of$\tau$ 繪製如下，且因hierarchical model 設定中，$\tau^{2}$為$\theta_{j}$中的變異數，故僅考慮正數
```
# Visualization of posterior dist. of tau 
## Writing posterior dist. of tau
tau_dist <- function(tau = 3){
  # Define some constant 
  y <- data.p1$y_j 
  sigma_2 <- data.p1$sigma_j^2
  V_u <- 1 / sum( 1/(sigma_2 + tau^2) )
  mu_hat <- sum( y / ( sigma_2 + tau^2) ) / 
    sum( 1/ ( sigma_2 + tau^2 ) )
  # log density function
  z1 <- 0.5 * log(V_u)
  z2 <- -0.5 *log(sigma_2 + tau^2)
  z3 <- -((y - mu_hat)^2) / (2*(sigma_2 + tau^2) )
  z4 <- (z2 + z3) %>% sum
  value <- (z1 + z4) %>% exp()
  return(value)
}
```
```
# Visualization
tau.plot <- data.frame(x = seq(0.01, 1, 
                               length.out = 1000))
tau.plot$y <- tau.plot %>% apply(., 1, tau_dist)
tau.plot$y %<>%  divide_by(sum(tau.plot$y)) # Normalizing
tau_plot <- ggplot(data = tau.plot, aes(x = x, y = y)) + 
  geom_line(color = 'black') + 
  labs(x = expression(tau), 
       y = expression(paste("p(", tau, "|y)")), 
       title = "Marginal posterior density"
       ) + 
  theme_grey(base_size = 13)
tau_plot + xlim(0, 0.5) 
```

Produce graphs analogous to Figures 5.6 and 5.7 to display how the posterior means and standard deviations of the $\theta_{j}'s$ depend on $\tau$.
We know \[ \begin{align} \theta_{j}|(\mu,\tau,y) &\sim N(\hat{\theta}_{j},\ V_{j}) \\ Where,\quad \hat{\theta}_{j} &= \frac{\frac{1}{\sigma_{j}^{2}}y_{j}+\frac{1}{\tau^{2}}\mu}{\frac{1}{\sigma_{j}^{2}} + \frac{1}{\tau^{2}}}, \quad V_{j}= \frac{1}{\frac{1}{\sigma_{j}^{2}} + \frac{1}{\tau^{2}}} \end{align} \] 由上述可知posterior means and standard deviations of the$\theta_{j}'s$ depends on $\tau$.(其中$\mu$可用(a)小題之$\hat{\mu}$帶入)

# Writing posterior mean/var function
post.mean_fun <- function(tau, yj, sj_2){
  # Define constant 
  y <- data.p1$y_j 
  sigma_2 <- data.p1$sigma_j^2
  mu_hat <- sum( y / ( sigma_2 + tau^2) ) / 
    sum( 1/ ( sigma_2 + tau^2 ) )
  # Calculate posterior mean
  z1 <- (yj/sj_2) + mu_hat/(tau^2)
  z2 <- (1/sj_2) + (1/(tau^2))
  value <- z1/z2
  return(value)
}

# Posterior variance function
post.var_fun <- function(tau, sj_2){
  z1 <- (1/sj_2) + (1/tau^2)
  return(1/z1)
}

# ???ƾ??z
npoint <- 1000
df.post <- data.frame(
  x = rep(seq(0.01, 1, length.out = npoint), 22), 
  y = rep(data.p1$y_j , each = npoint), 
  s = rep(data.p1$sigma_j^2 , each = npoint)
  )

mean.value <- as.numeric()
for (i in 1:dim(df.post)[1]) {
  mean.value[i] <- post.mean_fun(tau = df.post[i, 1], 
                                 yj = df.post[i, 2], 
                                 sj_2 = df.post[i, 3])
}
var.value <- as.numeric()
for (i in 1:dim(df.post)[1]) {
  var.value[i] <- post.var_fun(tau = df.post[i, 1],
                               sj_2 = df.post[i, 3])
}

df.post$mean <- mean.value
df.post$sd <- var.value %>% sqrt
df.post$class <- rep(seq(1,22), each = npoint)
df.post <- df.post[, -c(2, 3)]

# Visualization of conditional mean 
cond.mean_plot <- ggplot(data = df.post, 
                         aes(x = x , y = mean, 
                             group = class)) + 
  geom_line(aes(colour = class %>% as.factor())) + 
  labs(x = expression(tau), 
   y = expression(paste("E( ", theta[j]," | ", 
                        tau, ", ", y, " )"))) + 
  theme_grey(base_size = 13) + 
  theme(legend.position = "none")
# Visualization of conditional variance 
cond.sd_plot <- ggplot(data = df.post, 
                        aes(x = x , y = sd,
                            group = class)) + 
  geom_line(aes(colour = class %>% as.factor())) + 
  labs(x = expression(tau), 
   y = expression(paste("sd( ", theta[j]," | ", 
                        tau, ", ", y, " )"))) + 
  theme_grey(base_size = 13) + 
  theme(legend.position = "none")
grid.arrange(cond.mean_plot, cond.sd_plot, 
             ncol = 2)

由上述圖形可看出，當$\tau$增加時，posterior mean and posterior variance 會慢慢收斂即

\[ \begin{align} & \hat{\theta}_{j} = \frac{\frac{1}{\sigma_{j}^{2}}y_{j}+\frac{1}{\tau^{2}}\mu}{\frac{1}{\sigma_{j}^{2}} + \frac{1}{\tau^{2}}} \stackrel{\tau \rightarrow \infty}{\longrightarrow} y_{j}\\ & V_{j}= \frac{1}{\frac{1}{\sigma_{j}^{2}} + \frac{1}{\tau^{2}}} \stackrel{\tau \rightarrow \infty}{\longrightarrow} \sigma_{j}^{2} \end{align} \]

Produce a scatterplot of the crude effect estimates vs. the posterior median effect estimates of the 22 studies. Verify that the studies with smallest sample sizes are partially pooled the most toward the mean.

# Writing simulation function 
simu.cond.post.dist_fun <- function(tau){
  y <- data.p1$y_j
  s_2 <- data.p1$sigma_j^2
  cond.theta_list <- vector("list", length(y)) %>% 
  `names<-`(., paste("theta", seq(1, 22)) %>% 
              gsub(" ", "_", .) )
  for (i in 1:length(y)) {
    for (j in 1:length(tau)) {
      cond.mu <- post.mean_fun(tau = tau[j], 
                               yj = y[i], 
                               sj_2 = s_2[i])
      cond.sd <- post.var_fun(tau = tau[j], 
                               sj_2 = s_2[i]) %>% sqrt
      cond.theta_list[[i]][j] <- rnorm(1, mean = cond.mu, 
                                       sd = cond.sd)
    }
  }
  return(cond.theta_list)
}

# Simulation conditional posterior distribution
set.seed(2019)
tau.index <-sample(1:1000, 
                   size = 1000, replace = TRUE, 
                   prob = tau.plot$y)
tau <- tau.plot[tau.index, ]$x # length() = 1000
cond.post.median <- simu.cond.post.dist_fun(tau = tau) %>% 
  lapply(., median) %>% unlist
pooled.mean <- sum( data.p1$y_j/ (data.p1$sigma_j)^2) /
  sum(1/ (data.p1$sigma_j)^2)

# Visualization 
ggplot(data = NULL, 
       aes(x = data.p1$y_j, y = cond.post.median)) + 
  geom_point(aes(colour = (data.p1$n0 + data.p1$n1) ),
             size = 3) + 
  labs(x = expression(y[j]),
       y = "posterior median", 
       color = "Sample size") + 
  geom_text_repel(aes(x = data.p1$y_j, 
                      y = cond.post.median, 
                      label =  seq(1, 22))) +
  geom_hline(yintercept = pooled.mean, 
             col = "red", 
             linetype = "dashed")  +
  scale_color_continuous(low = "lightblue", 
                         high = "#003366") + 
  theme_grey(base_size = 13)

由上述圖形可看出，study_1(最少樣本數)的中位數很靠近pooled mean

Draw simulations from the posterior distribution of a new treatment effect, $\tilde{\theta}_{j}$ . plot a histogram of the simulations.
According to the textbook BDA3 p118, which say To obtain posterior predictive simulations of new data $\tilde{y}$ , perform the following steps

step 1: Draw $(\mu,\ \tau)$ from $p(\mu,\tau|y) = p(\mu|\tau,y)*p(\tau|y)$

step 2: Draw $\tilde{\theta}=(\tilde{\theta}_{1},\tilde{\theta}_{2},...\tilde{\theta}_{\tilde{J}})$ from $p(\tilde{\theta}_{j} |\mu,\tau)$, which is the prior distribution of $\theta$ given the hyperparameter

# Step 1: Draw (mu, tau) 
nsample <- 5000
set.seed(2019)
# step 1-1 : first sample from p(tau | y)
tau.plot <- data.frame(x = seq(0, 1, 
                               length.out = 10000))
tau.plot$y <- tau.plot %>% apply(., 1, tau_dist)
tau.plot$y %<>%  divide_by(sum(tau.plot$y)) # Normalizing

tau.index <- sample(1:10000, 
                    size = nsample, replace = TRUE, 
                    prob = tau.plot$y)
tau <- tau.plot[tau.index, 1] 

# step 1-2 : second sample from p(mu | tau, y )

nu_hat <- function(tau = 0.2){
  z1 <- (data.p1$y_j/(data.p1$sigma_j^2 + tau^2)) %>% sum
  z2 <- (1/(data.p1$sigma_j^2 + tau^2)) %>% sum
  values <- z1/z2
  return(values)
  }

Vu <- function(tau = 0.2){
  z1 <- (1/(data.p1$sigma_j^2 + tau^2)) %>% sum
  values <- 1/z1
  return(values)
  }

mu <- as.numeric()
for (i in 1:nsample) {
  mu[i] <- rnorm(1, 
                 mean = nu_hat(tau = tau[i]), 
                 sd = Vu(tau = tau[i]) %>% sqrt)
}

# Step 2 : sample from p(theta|mu, tau)
pred.theta <- mapply(rnorm, 
                     n = rep(1, nsample), 
                     mean = mu,  
                     sd = tau)

# Visualization 
theta.info_table <- c(pred.theta %>% mean, 
                      pred.theta %>% var) %>% 
  round(., 4) %>% 
  matrix(., ncol = 1) %>% 
  `colnames<-`(., "Value") %>% 
  `rownames<-`(., c("Mean", "Var"))

ggplot(data = NULL, aes(x = pred.theta)) + 
  geom_histogram(bins = 40, 
                  fill = 'light blue', color = 'black') + 
  labs(x = expression(tilde(theta)), 
       y = "", 
       title = "Histogram") + 
  annotation_custom(tableGrob(theta.info_table), 
                    xmin = 0.2, xmax = 0.5, 
                    ymin = 600, ymax = 800)

Above the figure. it;s the histogram of $\tilde{\theta}$

Given the simulations just obtained, draw simulated outcomes from replications of a hypothetical new experiment with 100 persons in each of the treated and control groups. Plot a histogram of the simulations of the crude estimated treatment effect (5.23) in the new experiment.

Problem 2

Suppose an electronic part having lifetime modeled as an exponential distribution: \[ p(y|\theta)= \frac{1}{\theta}*exp\left\{\frac{-y}{\theta}\right\},\quad with\ \ mean\ \ \theta\ \ and\ \ variance\ \ \theta^{2} \] A retailer gets two specimen of the part from each of 10 manufacturers.
Let ${\bf y}_{j} = (y_{j1},y_{j2})$ represent the lifetimes of the two specimen from the jth manufacturer, and $\theta_{j}$ represent the exponential parameter accordingly. If we assume the manufacturers are exchangeable, then $\theta_{j}$ can be modeled as random samples from a population. A natural population distribution for $\theta_{j}$ is the inverse-Gamma($\alpha$, $\beta$) distribution. The prior $\pi(\alpha,\ \beta) \propto \beta^{-5/2}$ is used in the following analysis.
Let ${\bf Y} = ({\bf y}_{1}, {\bf y}_{2}, ..., {\bf y}_{10})$.

Data
	1	2	3	4	5	6	7	8	9	10
$y_{j1}$	1.47	1.75	0.66	5.59	3.70	0.81	0.95	2.24	0.25	2.71
$y_{j2}$	0.25	5.58	0.61	0.54	8.42	0.05	1.21	2.03	2.82	0.62

According to the above statements, we can set the hierarchical model \[ \begin{align} \left\{ \begin{aligned} & y_{j}|\theta_{j} \sim Exp(\theta_{j}) \\ & \theta_{j}|(\alpha,\beta) \sim Inv-gamma(\alpha,\ \beta) \\ & p(\alpha,\ \beta) \propto \beta^{-5/2} \end{aligned} \right. \end{align} \] (a) Derive the (unnormalized) joint posterior of ($\alpha,\ \beta,\ \theta_{1},\ \theta_{2}, ...,\ \theta_{10}$) given data ${\bf Y}$
< Proof >
Let $\underset{\tilde{}}{\theta}=(\theta_{1},\ \theta_{2}, ...,\ \theta_{10})$ \[ \begin{align} p(\alpha,\ \beta,\ \underset{\tilde{}}{\theta}| {\bf Y} ) &\propto p({\bf Y}|\underset{\tilde{}}{\theta})*p(\underset{\tilde{}}{\theta}|\alpha,\ \beta)*\pi(\alpha,\ \beta) \\ &\propto \prod_{j=1}^{10}p({\bf y}_{j}|\theta_{j})* \prod_{j=1}^{10} p(\theta_{j}|\alpha,\ \beta)*\pi(\alpha,\ \beta),\quad by\ Exchangeable,\ de\ Finetti's \\ &\propto \prod_{j=1}^{10} \left(\frac{1}{\theta_{j}}\right)^{2} * exp\left\{ \frac{-(y_{j1}+y_{j2})}{\theta_{j}} \right\} * \prod_{j=1}^{10} \frac{\beta^{\alpha}}{\Gamma(\alpha)}\theta_{j}^{-(\alpha+1)}exp\left\{\frac{-\beta}{\theta_{j}} \right\} * \frac{1}{\beta^{5/2}} \end{align} \] (b) Derive the conditional posterior given hyperparameters: $p(\theta_{j} |\alpha,\ \beta, {\bf Y})$.
< Proof > \[ \begin{align} p(\underset{\tilde{}}{\theta}|\alpha,\ \beta, {\bf Y}) &\propto p(\underset{\tilde{}}{\theta},\ {\bf Y},\ \alpha,\ \beta) \\ &\propto p({\bf Y}|\underset{\tilde{}}{\theta},\ \alpha,\ \beta)*p(\underset{\tilde{}}{\theta}|\alpha,\ \beta)*\pi(\alpha,\ \beta) \\ &\propto p({\bf Y}|\underset{\tilde{}}{\theta})*p(\underset{\tilde{}}{\theta}|\alpha,\ \beta)*\pi(\alpha,\ \beta) \\ &\propto \prod_{j=1}^{10} \left(\frac{1}{\theta_{j}}\right)^{2} * exp\left\{ \frac{-(y_{j1}+y_{j2})}{\theta_{j}} \right\} * \prod_{j=1}^{10} \theta_{j}^{-(\alpha+1)}exp\left\{\frac{-\beta}{\theta_{j}} \right\} \\ &\propto \prod_{j=1}^{10} \theta_{j}^{-(\alpha+2+1)} exp\left\{ \frac{-(y_{j1}+y_{j1}+\beta)}{\theta_{j}} \right\} \end{align} \] According to the above the equation, the the $\theta_{j}'s$ are independent, so \[ \begin{align} \theta_{j}|(\alpha,\ \beta,\ {\bf Y}) \sim Inverse-Gamma(\alpha^{*} = \alpha+2,\ \beta^{*} = y_{j1}+y_{j2}+\beta) \end{align} \]

Derive the (unnormalized) marginal posterior: $p(\alpha,\ \beta|{\bf Y})$. Using grid approximation, you may plot $p(\alpha,\ \beta|{\bf Y})$ in image with contours.
< Proof > \[ \begin{align} p(\alpha,\ \beta|{\bf Y}) \propto p({\bf Y}|\alpha,\ \beta)*\pi(\alpha,\ \beta) \end{align} \] First, we find $p({\bf Y}|\alpha,\ \beta)$ \[ \begin{align} p({\bf y_{j}}|\alpha,\ \beta) &= \int p({\bf y_{j}}|\theta_{j})*p(\theta_{j}|\alpha,\ \beta) d\theta_{j} \\ &= \int_{0}^{\infty} \left(\frac{1}{\theta_{j}}\right)^{2} * exp\left\{ \frac{-(y_{j1}+y_{j2})}{\theta_{j}} \right\} * \frac{\beta^{\alpha}}{\Gamma(\alpha)} \theta_{j}^{-(\alpha+1)}exp\left\{\frac{-\beta}{\theta_{j}} \right\} d\theta_{j} \\ &= \frac{\beta^{\alpha}}{\Gamma(\alpha)} \int_{0}^{\infty} \theta_{j}^{-(\alpha+2+1)} exp\left\{\frac{-(y_{j1}+y_{j2}+\beta)}{\theta_{j}}\right\} d\theta_{j},\quad by\ Inv-gamma\ function \\ &= \frac{\beta^{\alpha}}{\Gamma(\alpha)} \frac{\Gamma(\alpha+2)}{(y_{j1}+y_{j2}+\beta)^{\alpha+2}} \\ \Rightarrow p({\bf Y}|\alpha,\ \beta) &= \prod_{j=1}^{10} \frac{\beta^{\alpha}}{\Gamma(\alpha)} \frac{\Gamma(\alpha+2)}{(y_{j1}+y_{j2}+\beta)^{\alpha+2}} \end{align} \] So, \[ \begin{align} p(\alpha,\ \beta|{\bf Y}) &\propto p({\bf Y}|\alpha,\ \beta)*\pi(\alpha,\ \beta) \\ &\propto \prod_{j=1}^{10} \frac{\beta^{\alpha}}{\Gamma(\alpha)} \frac{\Gamma(\alpha+2)}{(y_{j1}+y_{j2}+\beta)^{\alpha+2}}*\frac{1}{\beta^{5/2}} \end{align} \] Using grid approximation to plot the marginal posterior distribution.
```
# Visualization
# Data
y_j1 <- c(1.47, 1.75, 0.66, 5.59, 3.70, 0.81, 0.95, 2.24, 0.25, 2.71)
y_j2 <- c(0.25, 5.58, 0.61, 0.54, 8.42, 0.05, 1.21, 2.03, 2.82, 0.62)

# Log.magrinal 
log.magrinal <- function(alpha = 1, beta = 2){
  z1 <- beta^{alpha} / gamma(alpha)
  z2 <- gamma(alpha + 2) / (y_j1 + y_j2 + beta)^{alpha+2}
  z3 <- beta^{-5/2}
  value <- sum( log(z1 * z2) ) + log(z3)
  return(value)
  }

# Grid approximation
m <- 100
alpha.seq <- seq(0.1, 4.5, length.out = m)
beta.seq <- seq(0.1, 7, length.out = m)
grid.value_magrinal <- data.frame(alpha = rep(alpha.seq, 
                                              each = m), 
                                  beta = rep(beta.seq, 
                                             times = m))
grid.value_magrinal$density <- grid.value_magrinal %>% 
  apply(., 1,  FUN = function(x){ 
    log.magrinal(alpha = x[1], beta = x[2])
    }) %>% exp()

# Visualization
marginal_dist <- ggplot(data = grid.value_magrinal, 
                        aes(x = alpha, y = beta)) + 
  geom_raster(aes(fill = density), interpolate = TRUE) +
  geom_contour(aes(z = density), 
               colour = "black", size = 0.3) + 
  scale_fill_gradientn(colours = c("white", "yellow", "red")) + 
  labs(title = "Marginal posterior evaluated in grid", 
       x = expression(alpha), y = expression(beta), 
       caption = "Contours plot") +
  theme_grey(base_size = 13) + 
  theme(legend.position = "none") 
marginal_dist
```

Using the observed data, draw iid samples of ($\alpha,\ \beta,\ ?c_{1},\ ?c_{2}, ...,\ ?c_{10}$) from the joint posterior in (a) via a 2-step procedure:

step 1: draw $(\alpha^{(i)},\ \beta^{(i)})$ from the grid approximation of $p(\alpha,\ \beta|{\bf Y})$ in (c)

step 2: draw $\theta^{(i)}_{j}$ from $p(\theta_{j} |\alpha^{(i)},\ \beta^{(i)}, {\bf Y})$ in (b) for j = 1,2,…,10.

Summarize the posterior distribution of each parameters as follows using simulated random samples:

# Step 1 : Sampling from marginal posterior 
set.seed(2019)
nsample <- 1000  # smpling number
y <- y_j1 + y_j2
index <- sample(1:(m^2), 
                size = nsample, replace = TRUE, 
                prob = grid.value_magrinal$density)
hyper.parameter.table <- grid.value_magrinal[index, ] %>% 
  .[,c(1,2)] 

# Step 2 : Sampling from conditional posterior
theta.list <- vector("list", 10) %>% 
  `names<-`(., paste("theta", seq(1, 10)) %>% 
              gsub(" ", "_", .) )

for (i in 1:10) {
  theta.list[[i]] <- rinvgamma(n = 1000, 
                               shape = hyper.parameter.table$alpha + 2, 
                               rate = y[i] + hyper.parameter.table$beta)
  }

# Step 3 : Summarize 
## Writing calculate infromation function 
info_fun <- function(x){
  mu <- mean(x)
  med <- median(x)
  sd <- sd(x)
  LCI.90 <- x %>% sort %>% .[50]
  UCI.90 <- x %>% sort %>% .[950]
  return(c(mu, med, sd, LCI.90, UCI.90) )
  }

rbind(
  hyper.parameter.table %>% apply(., 2, info_fun) %>% t, 
  theta.list %>% lapply(., info_fun) %>% unlist() %>% 
    matrix(., ncol = 5, by = TRUE) %>% 
    `row.names<-`(., paste("theta", seq(1, 10)) %>% 
                    gsub(" ", "_", .))
  ) %>% 
  `colnames<-`(., c("mean", "median", "sd", "90%_LCI", "90%_UCI")) %>% as.matrix() %>% 
  round(., 4) %>% 
  `rownames<-`(., c("$\\alpha$", "$\\beta$", 
                    "$\\theta_{1}$", "$\\theta_{2}$", 
                    "$\\theta_{3}$", "$\\theta_{4}$", 
                    "$\\theta_{5}$", "$\\theta_{6}$", 
                    "$\\theta_{7}$", "$\\theta_{8}$", 
                    "$\\theta_{9}$", "$\\theta_{10}$")) %>%
  kable(., "html", caption = "Summarization") %>%
  kable_styling(bootstrap_options = "striped", full_width = F) %>% 
  add_header_above(c("parameter", "posterior" = 5))

Summarization
parameter	posterior
	mean	median	sd	90%_LCI	90%_UCI
$\alpha$	1.9836	1.7889	1.0258	0.6333	4.0111
$\beta$	2.7126	2.4697	1.7704	0.4485	6.0939
$\theta_{1}$	1.4356	1.1384	1.1486	0.4601	3.3190
$\theta_{2}$	3.6089	2.7754	2.7927	1.2695	9.0403
$\theta_{3}$	1.3184	1.0507	1.1194	0.4142	2.9606
$\theta_{4}$	3.0714	2.4256	2.7744	1.1286	6.3759
$\theta_{5}$	5.4554	4.0697	5.4203	1.8923	12.8729
$\theta_{6}$	1.1484	0.9163	0.8438	0.3305	2.6624
$\theta_{7}$	1.6310	1.2727	1.3922	0.5866	3.5593
$\theta_{8}$	2.4265	1.8537	1.8691	0.8453	5.7407
$\theta_{9}$	1.9728	1.4856	1.8676	0.6741	4.5243
$\theta_{10}$	2.0881	1.6473	1.7587	0.7249	4.7117

Make a plot similar to Figure 5.4 in BDA3 but using $\bar{y}_{j} = (y_{j1} + y_{j2})/2$ in the x-axis and the posterior mean of $\theta_{j}$ together with the 90% posterior interval in the y-axis, and comment on it.

y_bar <- (y_j1 + y_j2)/2
theta_mean <- theta.list %>% lapply(., mean) %>% unlist
CI_90 <- theta.list %>% lapply(., info_fun) %>% 
  unlist() %>% matrix(., ncol = 5, by = TRUE) %>% 
  .[, c(4, 5)]
# Visualization
ggplot(data = NULL, 
       aes(x = y_bar, y = theta_mean)) + 
  geom_point() + 
  geom_errorbar(aes(ymin = CI_90[, 1], 
                    ymax = CI_90[, 2])) + 
  geom_abline(intercept = 0, slope = 1, color = "red", 
              linetype = "dashed") + 
  labs(x = expression(bar(y)[j]), 
       y = expression(paste("Posterior mean of ", theta[j] )), 
       caption = "90% posterior interval") + 
  geom_text_repel(aes(x = y_bar, y = theta_mean, 
                      label =  y_bar %>% seq) ) +
  theme_grey(base_size = 13)

Above the figure, we know,

90% posterior interval of $\theta_{j}$均包含45度線(紅色)，表用貝氏分析所估計的 $\theta_{j}$和用$\bar{y}_{j}$所估計的的 $\theta_{j}$很相近。
隨著$\bar{y}_{j}$的增加，90% posterior interval of $\theta_{j}$ 會變大

Bayesian_Analysis_HW3

林暐翔

2019/4/2

Problem 1

Problem 2

	1	2	3	4	5	6	7	8	9	10
\(y_{j1}\)	1.47	1.75	0.66	5.59	3.70	0.81	0.95	2.24	0.25	2.71
\(y_{j2}\)	0.25	5.58	0.61	0.54	8.42	0.05	1.21	2.03	2.82	0.62