# Installing some useful packags 
library(tidyverse)
library(kableExtra)
library(gridExtra)
library(ggrepel)
library(ggfortify)
library(magrittr)
library(gganimate)
library(tscount)    # Analysis of Count Time Series
library(mvtnorm)    # Multivariate normal 
# Bayesian packages 
library(rstan)
# library(devtools)   # Download frow Github
# install_github('nathanvan/rstanmulticore')
library(rstanmulticore)  # Parallel rstan
library(bayesplot)   # Plotting posterior analysis,

1 About stan

Stan has a modeling language, which is similar to but not identical to that of the Bayesian graphical modeling package BUGS (Lunn et al. 2000). A parser translates a model expressed in the Stan language to C++ code, whereupon it is compiled to an executable program and loaded as a Dynamic Shared Object (DSO) in R which can then be called by the user.

2 Hamiltonian Monte Carlo (HMC)

< HMC-Algorithm > \[ \begin{align} &*\ Define\ N,\ \epsilon,\ L,\ M \\ &*\ Set\ a\ initial\ value\ x^{(0)},\ and\ \phi^{(0)} \sim N(0,M) \\ &*\ For\ t=1,2...N \\ &\quad \quad 1.\ Leapfrog \\ &\quad \quad \quad \left\{ \begin{aligned} &\phi \leftarrow \phi + \frac{1}{2} \epsilon \frac{dlogP(\theta|y)}{d\theta} \\ &For\ l=1,2...L \\ &\quad \theta \leftarrow \theta + \epsilon M^{-1} \phi \\ &\quad \phi \leftarrow \phi + \frac{1}{2} \epsilon \frac{dlogP(\theta|y)}{d\theta} \\ &\phi \leftarrow \phi + \frac{1}{2} \epsilon \frac{dlogP(\theta|y)}{d\theta} \end{aligned} \right. \\ &\quad \quad \quad \rightarrow (\theta^{*},\ \phi^{*}) \\ &\quad \quad 2.\ Calculate\ r \\ &\quad \quad \quad r = \frac{P(\theta^{*}|y)*P(\phi^{*})}{P(\theta^{(t-1)}|y)*P(\phi^{(t-1)})} \\ &\quad \quad 3.\ Update \\ &\quad \quad \quad \theta^{(t)} = \left\{ \begin{aligned} &\theta^{*},\ with\ probability\ min(r,\ 1) \\ &\theta^{(t-1)},\ o.w \end{aligned} \right. \end{align} \] 簡言之,HMC在更新樣本點的時候考慮了梯度(gradient),故在高維的抽樣時,其效率會叫Random-Walk 快

Example : Bivariate normal
If we want to simulate bivariate normal \[ \begin{align} {\bf x} = \begin{pmatrix} x_{1} \\ x_{2} \end{pmatrix} \sim N\left( {\bf \mu} = \begin{pmatrix} 0 \\ 0 \end{pmatrix} , \Sigma = \begin{pmatrix} 1 & 0.8 \\ 0.8 & 1 \end{pmatrix} \right) \end{align} \]

# HMC - algorithm
binorm.HMC_fun <- function(x.int = c(-3, 3),
                           mu = c(0, 0),
                           sigma = matrix(c(1, 0.8, 0.8, 1), nrow = 2),
                           epsilon = 0.1, L = 10, 
                           N = 3000){
  # Define some function 
  log_post <- function(x){   #  -log posterior 
    inv_sigma <- solve(sigma)
    value <- t(x - mu) %*% inv_sigma %*% (x - mu)/2
    return(value)
  }
  
  gradient <- function(x){               # -gradient
    inv_sigma <- solve(sigma)
    value <- inv_sigma %*% (x - mu)
    return(value)
  }
  
  HMC_fun <- function(x_int,  # Initial 
                      U,     # log posterior 
                      grad_U , # graidnet
                      epsilon, L = L){
    # Step 1 : phi ~ N(0, 1)
    phi <- rnorm(2) %>% as.matrix(., nrow = 2)
    phi_current <- phi
    x_current <- x_int
    # Step 2: leapfrog 
    phi <- phi - epsilon * grad_U(x_int) / 2
    for (i in 1:L) {
      x_int <- x_int + epsilon * phi
      if (i!=L) phi <- phi - epsilon * grad_U(x_int) 
    }
    # Make a half step for momentum at the end.
    phi <- phi - epsilon * grad_U(x_int) / 2
    # Negate momentum at end of L step to make the proposal symmetric
    phi <- -phi
    # Step 3 : Calculating
    r <- (U(x_current) -  U(x_int) + 
            (sum(phi_current^2) / 2) -( sum(phi^2) / 2) ) %>% exp
    # current_U = U(x_current)
    # current_K = sum(phi_current^2) / 2
    # proposed_U = U(x_int)
    # proposed_K = sum(phi^2) / 2
    if (runif(1) < r){
      return (x_int)     # accept
    }else{
      return (x_current)  # reject
    }
  }
  # Generate HMC data 
  HMC_data <- matrix(NA, nrow = N, ncol = 2) %>% 
    `colnames<-`(., c("x1", "x2"))
  HMC_data[1, ] <- x.int
  for (i in 2:N) {
    HMC_data[i, ] <- HMC_fun(x_int = x.int,  # Initial 
                             U = log_post,     # log posterior 
                             grad_U = gradient, # graidnet
                             epsilon = epsilon, L = L)
    x.int <- HMC_data[i, ]
  }
  return(HMC_data)
}
# Random Walker 
binorm.MH_fun <- function(x.int = c(3, -3), N = 3000, 
                          mu, sigma){
  # Define some function
  MH_fun <- function(x, mu, Sigma){
    # Step 1 : draw samples 
    x_start <- rmvnorm(1, mean = x, sigma = diag(2)) %>% as.vector()
    # Step 2 : Calculate r
    r <- dmvnorm(x_start, mean = mu, sigma = Sigma)/dmvnorm(x, mean = mu, sigma = Sigma)
    # Step 3 
    if (runif(1) < r){
      return (x_start)     # accept
    }else{
      return (x)  # reject
    }
  }
  # Genertae MH data
  MH_data <- matrix(NA, nrow = N, ncol = 2) %>% 
    `colnames<-`(., c("x1", "x2"))
  MH_data[1, ] <- x.int
  for (i in 2:N) {
    MH_data[i, ] <- MH_fun(x = x.int, mu = mu, Sigma = sigma)
    x.int <- MH_data[i, ]
  }
  return(MH_data)
}
# Simulation
set.seed(2019)
# HMC 
mu <- c(0, 0)
sigma <- matrix(c(1, 0.8, 0.8, 1), nrow = 2)
HMC_data <- binorm.HMC_fun(x.int = c(3, -3), 
                           mu = mu,
                           sigma = sigma,
                           epsilon = 0.1, L = 10, 
                           N = 1000)
# MH
MH_data <- binorm.MH_fun(x.int = c(3, -3), N = 1000, 
                         mu = mu, sigma = sigma)
# Visualization 
HMC_plot <- HMC_data %>% as.data.frame() %>% .[1:300,] %>% 
  ggplot(., aes(x = x1, y = x2)) + 
  geom_point() + geom_path() + ggtitle("HMC")
MH_plot <- MH_data %>% as.data.frame() %>% .[1:300,] %>% 
  ggplot(., aes(x = x1, y = x2)) + 
  geom_point() + geom_path() + ggtitle("Random-Walk")

grid.arrange(HMC_plot, MH_plot, ncol = 2)

Note:

HMC vs. Random walk

HMC vs. Random walk

由上數GIF圖可看出,在相同起始值下,HMC的收斂速度較MH快 但因HMC對\((\epsilon,L)\)這兩個參數很敏感,即表這兩個參數的調整會影響到HMC的收斂效果,故Hoffan和Gelman在2013年提供No U-turn Sampler(NUTS)來改進HMC

< ACF >

acf_HMC1 <- acf(HMC_data[, 1], plot = FALSE, 
                lag.max = 20) %>%
  autoplot() +
  labs(title = "HMC", caption = "x1")
acf_HMC2 <- acf(HMC_data[, 2], plot = FALSE, 
                lag.max = 20) %>%
  autoplot() +
  labs(title = "HMC", caption = "x2")
acf_MH1 <- acf(MH_data[, 1], 
               plot = FALSE, lag.max = 20) %>%
  autoplot() + 
  labs(title = "Random-Walk", caption = "x1")
acf_MH2 <- acf(MH_data[, 2], 
               plot = FALSE, lag.max = 20) %>%
  autoplot() + 
  labs(title = "Random-Walk", caption = "x2")

grid.arrange(acf_HMC1, acf_MH1, acf_HMC2, acf_MH2, 
             ncol = 2)

3 Demo

3.1 HW2-2

HW2-data
1 2 3 4 5 6 7 8 9 10
y 0.25 0.5 0.62 0.92 0.98 1 1.07 1.1 1.2 1.48

Set the model is \[ \begin{align} &Y_{i}|\theta \sim Weibull(\alpha=2,|\theta) \\ &\theta \sim \Gamma(\alpha,\beta),\quad informative\ prior \\ \end{align} \]

# Step 1: Stan code
HW2.2_code <- "
data { 
  int <lower = 0> N;
  vector[N] y;
  real alpha;
  real prior_alpha;
  real prior_beta;
}

parameters {
  real <lower = 0> theta;
}

model {
  theta ~ gamma(prior_alpha, prior_beta); // prior
  y ~ weibull(alpha, 1/sqrt(theta));  // likelihood
}

generated quantities {
  real post_pred;  // prediction 
  post_pred = weibull_rng(alpha, 1/sqrt(theta));
}
"
# Step 2: Stan dataset 
y <- c(0.25, 0.50, 0.62, 0.92, 0.98, 
       1.00, 1.07, 1.10, 1.20, 1.48)
HW2.2_data <- list(N = length(y),
                   y = y,
                   alpha = 2,
                   prior_alpha = 9, prior_beta = 50)
# Step3: Modeling by stan
time.start <- Sys.time()
fit.hw2.2 <- stan(model_code = HW2.2_code, 
                  data = HW2.2_data, 
                  iter = 5000, chains = 4)
time.stop <- Sys.time()

< Posterior inference >

# Posterior inference
proc_time <- round(time.stop - time.start, 2)
summary(fit.hw2.2)$summary %>% 
  as.matrix() %>% round(., 4) %>% 
  .[, c(1, 3, 6, 9, 10)] %>% 
  kable(., "html") %>% 
  kable_styling(bootstrap_options = "striped", 
                full_width = FALSE) %>% 
  footnote(paste(proc_time, "sec"))
mean sd 50% n_eff Rhat
theta 0.3186 0.0735 0.3121 3926.535 1.0003
post_pred 1.5894 0.8640 1.4734 9205.518 0.9999
lp__ -41.1924 0.7046 -40.9182 4948.175 1.0004
Note:
1.44 sec
# Extract some information 
hw2.2.theta <- rstan::extract(fit.hw2.2)$theta 
hw2.2.pred <- rstan::extract(fit.hw2.2)$post_pred
# Visualizaiton
hw2.2.theta_plot <- ggplot(data = NULL, 
                           aes(x = hw2.2.theta)) + 
  geom_histogram(aes(y = ..density..), 
                 binwidth = 0.03, 
                 fill = 'light blue', 
                 color = 'black') + 
  geom_density() + 
  labs(x = expression(theta), title = "Posterior dist.")

hw2.2.pred_plot <- ggplot(data = NULL, 
                          aes(x = hw2.2.pred)) + 
  geom_histogram(aes(y = ..density..), 
                 binwidth = 0.2, 
                 fill = 'light blue', 
                 color = 'black') + 
  labs(title = "Distribution of prediction") + 
  geom_density() + 
  labs(x = expression(tilde(y)), title = "Prediction dist.")

grid.arrange(hw2.2.theta_plot, hw2.2.pred_plot, ncol = 2)

the answer is same.

3.2 HW2-3

Year Year_index Fatel_accidents Passenger_deaths Death_rate
1976 1 24 734 0.19
1977 2 25 516 0.12
1978 3 31 754 0.15
1979 4 31 877 0.16
1980 5 22 814 0.14
1981 6 21 362 0.06
1982 7 26 764 0.13
1983 8 20 809 0.13
1984 9 16 223 0.03
1985 10 22 1066 0.15

Set the model is
\[ \begin{align} &Y_{t} \sim Poisson(\lambda_{t}),\ t=1,2...10 \\ where,\ &ln(\lambda_{t})=\alpha+\beta*t \\ and,\ &\pi(\alpha,\beta) \propto 1 \end{align} \] where, y = Fatel_accidents; x = Year_index

# Step 1: Stan code
HW2.3_code <- "
data {
  int <lower = 0> N;  // Number of observations 
  int y[N];
  real x[N];
}

parameters {
  // Define parameters to estimate
  real alpha;
  real beta;
}

transformed parameters  {
  real <lower = 0> lambda[N];
  for (i in 1:N) {
    lambda[i] = exp(alpha + beta*x[i]);  // Mean
  }
}

model {
  // If we want to chage the prior
  // alpha ~ normal(3.3742, 0.1153);   // Define prior
  // beta ~ normal(-0.0404, 0.0186);   // Define prior
  y ~ poisson(lambda);
}

generated quantities {
  int <lower = 0> post_pred;  // prediction 
  post_pred = poisson_rng( exp(alpha + 11*beta) );
}
"
# Step 2: Stan dataset 
y <- c(24, 25, 31, 31, 22, 21, 26, 20, 16, 22)
x <- seq(1976, 1985) - 1975
HW2.3_data <- list(N = length(y),
                   y = y, x = x)
# Step3: Modeling by stan
time.start <- Sys.time()
fit.hw2.3 <- stan(model_code = HW2.3_code, 
                  data = HW2.3_data, 
                  iter = 10000, chains = 4)
time.stop <- Sys.time()

# Posterior inference
proc_time <- round(time.stop - time.start, 2)
fit.hw2.3 %>% summary %>% .$summary %>% 
  as.matrix() %>% .[1:2, c(1, 3, 6, 9, 10)] %>% 
  round(., 3) %>% 
  kable(., "html") %>% 
  kable_styling(bootstrap_options = "striped",
                full_width = FALSE) %>% 
  footnote(paste(proc_time, "sec"))
mean sd 50% n_eff Rhat
alpha 3.375 0.134 3.376 5051.611 1.001
beta -0.039 0.023 -0.039 4937.851 1.001
Note:
1.08 sec
# Visualization of contour plot
contour_plot <- rstan::extract(fit.hw2.3)[c("alpha",
                                     "beta")] %>% 
  as.data.frame() %>% 
  ggplot(., aes(x = alpha, y = beta)) + 
  stat_density_2d(aes(fill = ..level..), 
                  geom = "polygon", 
                  colour = "black") + 
  labs(x = "alpha", y = "beta", title = "") +
  scale_fill_gradient(low = "#FFFF00", 
                      high = "#FF3366") +
  labs(x = expression(alpha), y = expression(beta), 
       title = "Joint posterior dist.",
       caption = "Contour plot") +
  theme_grey() + 
  theme(legend.position = "None") 
contour_plot

# Distribution of W = alpha + 11*beta
W_fun <- function(alpha, beta){
  value <- exp(alpha + 11*beta)
  return(value)
}
W <- rstan::extract(fit.hw2.3)[c("alpha","beta")] %>% 
  as.data.frame() %>% 
  apply(., 1, FUN = function(x){
    W_fun(alpha = x[1], beta = x[2])
  }) 
# Visualization 
W.table <- c(W %>% mean, W %>% var) %>% round(., 4) %>% 
  matrix(., ncol = 1) %>% 
  `colnames<-`(., "Value") %>% 
  `rownames<-`(., c("Mean", "Var"))

ggplot(data = NULL, aes(x = W)) + 
  geom_histogram(aes(y = ..density..), 
                 binwidth = 0.5, 
                 fill = 'light blue', 
                 color = 'black') + 
  geom_density(aes(y = ..density..)) + # plot density curve
  labs(x = 'w', title = "Distribution of W",
       caption = "Expected number of fatal accidents in 1986") +
  theme_gray() + 
  annotation_custom(tableGrob(W.table), 
                    xmin = 24, xmax = 27, 
                    ymin = 0.1, ymax = 0.13)

# Prediction 
y.pred <- rstan::extract(fit.hw2.3)$post_pred
# Information of predictive dist.
y.info.table <- c(y.pred %>% sort() %>% .[250], 
                  y.pred %>% sort() %>% .[9750]) %>% 
  matrix(., nrow = 1) %>% 
  `rownames<-`(., "Value") %>% 
  `colnames<-`(., c("95%_LCI", "95%_UCI"))
ggplot(data = NULL, aes(x = y.pred)) +
  geom_histogram(aes(y = ..density..), 
                 binwidth = 1, 
                 fill = 'light blue', color = 'black') +
  geom_density() + 
  labs(title = bquote("Distribution of " ~ y[t==11] ),
       caption = "The number of fatal accidents in 1986", 
       x = expression(y[t==11])) + 
  annotation_custom(tableGrob(y.info.table), 
                    xmin = 30, xmax = 32, 
                    ymin = 0.075, ymax = 0.075) + 
  theme_gray()

the answer is same.

3.3 HW3-1

Results of 22 clinical trials of beta-blockers for reducing mortality after myocardial infarction
study \(y_{0j}\) \(n_{0j}\) \(y_{1j}\) \(n_{1j}\) \(y_{j}\) \(\sigma_{j}\)
1 3 39 3 38 0.0282 0.8503
2 14 116 7 114 -0.7410 0.4832
3 11 93 5 69 -0.5406 0.5646
4 127 1520 102 1533 -0.2461 0.1382
5 27 365 28 355 0.0695 0.2807
6 6 52 4 59 -0.5842 0.6757

Set the hierarchical model is \[ \begin{align} \left\{ \begin{aligned} & y_{j}|\theta_{j} \sim N(\theta_{j},\ \sigma_{j}^{2}) \\ & \theta_{j}|(\mu,\tau) \sim N(\mu,\ \tau^{2}) \\ & p(\mu,\ \tau) = p(\mu|\tau)*p(\tau) \propto p(\tau) \propto 1 \end{aligned} \right. \end{align} \]

# Step 1: Stan code 
HW3.1_code <- "
data {
  int <lower = 0> N;
  vector[N] y;
  real <lower = 0> sigma[N];
}

parameters {
  real mu;
  real tau;
  real theta[N];
}

model {
  theta ~ normal(mu, tau);    // prior
  y ~ normal(theta, sigma);   // likelihood
}
"
# Step 2: Stan dataset 
HW3.1_data <- list(N = 22, 
                   y = data.p1$y_j,
                   sigma = data.p1$sigma_j)
# Step 3: Modeling by stan
time.start <- Sys.time()
fit.hw3.1 <- stan(model_code = HW3.1_code, 
                  data = HW3.1_data, 
                  iter = 5000, chains = 4)
time.stop <- Sys.time()

# Posterior inference
proc_time <- round(time.stop - time.start, 2)
fit.hw3.1 %>% summary() %>% .$summary %>% 
  round(., 3) %>% as.matrix() %>% 
  .[1:4, c(1, 3, 6, 9, 10)] %>% 
  kable(., "html") %>% 
  kable_styling(bootstrap_options = "striped",
                full_width = FALSE) %>% 
  footnote(paste(proc_time, "sec"))
mean sd 50% n_eff Rhat
mu -0.245 0.067 -0.245 953.607 1.004
tau 0.143 0.077 0.133 388.401 1.008
theta[1] -0.237 0.168 -0.243 5335.010 1.000
theta[2] -0.292 0.163 -0.279 3447.232 1.001
Note:
1.02 sec
# Visualization  
poster.median <- fit.hw3.1 %>% summary() %>%
  .$summary %>% .[-c(1, 2, 25), 6] 

pooled.mean <- fit.hw3.1 %>% summary() %>% 
  .$summary %>% .[-c(1, 2, 25), 1] %>% mean

ggplot(data = NULL, 
       aes(x = data.p1$y_j, y = poster.median)) + 
  geom_point(aes(colour = (data.p1$n0 + data.p1$n1) ),
             size = 3) + 
  labs(x = expression(y[j]),
       y = "posterior median", 
       color = "Sample size") + 
  geom_text_repel(aes(x = data.p1$y_j, 
                      y = poster.median, 
                      label =  seq(1, 22))) +
  geom_hline(yintercept = pooled.mean, 
             col = "red", 
             linetype = "dashed")  +
  scale_color_continuous(low = "lightblue", 
                         high = "#003366") + 
  theme_grey(base_size = 13)

3.4 HW3-2

Data
1 2 3 4 5 6 7 8 9 10
\(y_{j1}\) 1.47 1.75 0.66 5.59 3.70 0.81 0.95 2.24 0.25 2.71
\(y_{j2}\) 0.25 5.58 0.61 0.54 8.42 0.05 1.21 2.03 2.82 0.62

we can set the hierarchical model \[ \begin{align} \left\{ \begin{aligned} & y_{j}|\theta_{j} \sim Exp(\theta_{j}) \\ & \theta_{j}|(\alpha,\beta) \sim Inv-gamma(\alpha,\ \beta) \\ & p(\alpha,\ \beta) \propto \beta^{-5/2} \end{aligned} \right. \end{align} \]

# Step1 : Stab code
HW3_code <-"
data {
  int<lower = 0> N_col;
  int<lower = 0> N_row;
  matrix[N_row, N_col] Y;
}
parameters {
  vector<lower = 0>[N_col] Theta;
  real<lower = 0, upper = 50> Alpha;
  real<lower = 0.000001> Beta;
}
model{
  Alpha ~ uniform(0, 50);
  Beta ~ pareto(0.000001, 1.5); // 0.001 and 0.000001
  //  target += -2.5 * log(Beta);

  for(i in 1 : N_col){
    Theta[i] ~ inv_gamma(Alpha, Beta);
  }

  for(i in 1 : N_row){
    for(j in 1 : N_col){
      Y[i, j] ~ exponential(Theta[j]);
    }
  }
}"
# Step 2 : Stan data
Y <- matrix(c(1.47, 1.75, 0.66, 5.59, 3.70,
              0.81, 0.95, 2.24, 0.25, 2.71,
              0.25, 5.58, 0.61, 0.54, 8.42,
              0.05, 1.21, 2.03, 2.82, 0.62), 
            byrow = T, nrow = 2)
HW3_data <- list(Y = Y, 
                  N_col = ncol(Y), 
                  N_row = nrow(Y))
# Step 3: Modeling by stan
time.start <- Sys.time()
fit.hw3.2_80 <- stan(model_code = HW3_code,
                     data = HW3_data, 
                     chains = 1, iter = 11000,
                     warmup = 1000, seed = 2020,
                     refresh = 11000, 
                     control = list(adapt_delta = .80)) # 2019/2018
fit.hw3.2_85 <- stan(model_code = HW3_code,
                     data = HW3_data, 
                     chains = 1, iter = 11000,
                     warmup = 1000, seed = 2010,
                     refresh = 11000, 
                     control = list(adapt_delta = .85))
fit.hw3.2_99 <- stan(model_code = HW3_code,
                     data = HW3_data, 
                     chains = 1, iter = 11000,
                     warmup = 1000, seed = 2019,
                     refresh = 11000, 
                     control = list(adapt_delta = .99))
time.stop <- Sys.time()

time.stop - time.start
## Time difference of 1.2196 mins
# Visualization of alpha and beta 
pairs(fit.hw3.2_80, pars = c('Alpha', ' Beta'))

pairs(fit.hw3.2_85, pars = c('Alpha', ' Beta'))

pairs(fit.hw3.2_99, pars = c('Alpha', ' Beta'))

Adapted_Delta <- c(80, 85, 99)
temp_Result_Mean <- rstan::summary(fit.hw3.2_80)$summary %>%
  as.data.frame(.) %>% select('mean') %>%
  cbind(summary(fit.hw3.2_85)$summary %>% as.data.frame(.) %>%
          select('mean')) %>%
  cbind(summary(fit.hw3.2_99)$summary %>% as.data.frame(.) %>%
          select('mean'))

temp_Result_Mean %<>% set_colnames(paste0('Adapted_Delta_',
                                          Adapted_Delta))


temp_Result_Rhat <- rstan::summary(fit.hw3.2_80)$summary %>%
  as.data.frame(.) %>% select('Rhat') %>%
  cbind(summary(fit.hw3.2_85)$summary %>% as.data.frame(.) %>%
          select('Rhat')) %>%
  cbind(summary(fit.hw3.2_99)$summary %>% as.data.frame(.) %>%
          select('Rhat'))

temp_Result_Rhat %<>% set_colnames(paste0('Adapted_Delta_',
                                          Adapted_Delta))


temp_Result_n_eff <- summary(fit.hw3.2_80)$summary %>%
  as.data.frame(.) %>% select('n_eff') %>%
  cbind(summary(fit.hw3.2_85)$summary %>% as.data.frame(.) %>%
          select('n_eff')) %>%
  cbind(summary(fit.hw3.2_99)$summary %>% as.data.frame(.) %>%
          select('n_eff'))
step_scan <- c(get_sampler_params(fit.hw3.2_80, 
                                  inc_warmup=FALSE)[[1]][, 'stepsize__'][1], 
               get_sampler_params(fit.hw3.2_85,
                                  inc_warmup=FALSE)[[1]][, 'stepsize__'][1], 
               get_sampler_params(fit.hw3.2_99,
                                  inc_warmup=FALSE)[[1]][, 'stepsize__'][1])


temp_Adapted_Step <- data.frame(X = Adapted_Delta, Y = step_scan)

g1 <- temp_Adapted_Step %>% ggplot(aes(x = X, y = Y)) +
  geom_path(size = 1) + 
  geom_point(data = temp_Adapted_Step, aes(x = X, y =  Y)) + 
  labs(x ='Adapted_Delta', y = "Step_Size")
g1

remove(g1)
Div_scan <- c(get_sampler_params(fit.hw3.2_80,
                                 inc_warmup=FALSE)[[1]][, 'divergent__'] %>% sum(.), 
              get_sampler_params(fit.hw3.2_85,
                                 inc_warmup=FALSE)[[1]][, 'divergent__'] %>% sum(.), 
              get_sampler_params(fit.hw3.2_99,
                                 inc_warmup=FALSE)[[1]][, 'divergent__'] %>% sum(.))

temp_Div_Step <- data.frame(X = Adapted_Delta, Y = Div_scan)

g1 <- temp_Div_Step %>% ggplot(aes(x = X, y = Y)) + 
  geom_path(size = 1) + 
  geom_point(data = temp_Div_Step, aes(x = X, y =  Y)) + 
  labs(x = 'Adapted_Delta', y = 'Number of Divergent')
g1

# ggsave(filename = 'Adapted_Div_1.png', plot = g1, dpi = 300)
remove(g1)
temp_Parameter_List <- vector('list', length = 3) %>%
  setNames(paste(Adapted_Delta))
temp_Parameter_List[[1]] <- rstan::extract(fit.hw3.2_80) %>%
  as.data.frame(.)
temp_Parameter_List[[2]] <- rstan::extract(fit.hw3.2_85) %>%
  as.data.frame(.)
temp_Parameter_List[[3]] <- rstan::extract(fit.hw3.2_99) %>% 
  as.data.frame(.)
temp_Name <- temp_Parameter_List %>% names(.)
l <- 1
temp <- temp_Parameter_List %>% lapply(function(x){
  temp_ <- x %>% select('Alpha', 'Beta') %>% 
    mutate(Class = temp_Name[l])
  l <<- l + 1
  return(temp_)
})

temp <- do.call('rbind', temp) %>% as.data.frame(.)
remove(temp_Name, l)
g1 <- temp %>% subset(Class %in% c('80', '99')) %>% 
  ggplot(aes(x = Alpha, y = Beta, 
             color = Class, shape = Class)) + 
  geom_point(size = 2) + 
  labs(title = '80 V.S 99') + xlim(c(0, 50)) + ylim(c(0, 50))
g2 <- temp %>% subset(Class %in% c('85', '99')) %>% 
  ggplot(aes(x = Alpha, y = Beta, 
             color = Class, shape = Class)) + 
  geom_point(size = 2) + 
  labs(title = '85 V.S 99') + xlim(c(0, 50)) + ylim(c(0, 50))
grid.arrange(g1, g2)

remove(g1, g2)
temp <- temp_Parameter_List %>% lapply(function(x){
  temp_ <- sapply(1 : 1000, function(y) mean(x[1 : (10 * y), 'Alpha'])) %>% as.data.frame(.) %>% 
    mutate(Iter = sapply(1 : 1000, function(x) x * 10)) %>%
    set_colnames(c('Mean', 'Iter'))
  return(temp_)
})

g1 <- temp[[1]] %>% 
  ggplot(aes(x = Iter, y = Mean, color = '80')) + 
  geom_path() + 
  geom_path(data = temp[[2]], aes(x = Iter, y = Mean, color = '85')) + 
  geom_path(data = temp[[3]], aes(x = Iter, y = Mean, color = '99')) + 
  ggtitle('Alpha')

temp <- temp_Parameter_List %>% lapply(function(x){
  temp_ <- sapply(1 : 1000, function(y) mean(x[1 : (10 * y), 'Beta'])) %>% as.data.frame(.) %>% 
    mutate(Iter = sapply(1 : 1000, function(x) x * 10)) %>%
    set_colnames(c('Mean', 'Iter'))
  return(temp_)
})

g2 <- temp[[1]] %>% 
  ggplot(aes(x = Iter, y = Mean, color = '80')) + 
  geom_path() + 
  geom_path(data = temp[[2]], aes(x = Iter, y = Mean, color = '85')) + 
  geom_path(data = temp[[3]], aes(x = Iter, y = Mean, color = '99'))  + 
  ggtitle('Beta')
remove(temp)
grid.arrange(g1, g2)

3.5 HW4

ecoli
year week cases
2001 1 5
2001 2 7
2001 3 17
2001 4 18
2001 5 10
2001 6 8
Note:
dim() = 646 * 3

we can set the hierarchical model \[ \begin{align} Y_{t} &\sim Poi(\lambda_{t}),\ t = 1,2...n \\ log(\lambda_{t}) &= \alpha + \beta x_{t} + \eta_{t} \\ \eta_{t} &= \phi\eta_{t-1} + a_{t},\quad a_{t}\sim N(0, \sigma^{2}_{a}) \\ \theta &= (\alpha, \beta, \phi, \sigma^{2}_{a}) \\ where,\ Set\ &\left\{ \begin{aligned} \alpha &\sim N(0, \tau^{2}) \\ \beta &\sim N(0, \tau^{2}I_{4}) \\ \phi &\sim U(-1,1) \\ \sigma^{2}_{a} &\sim Inv-gamma(a,b) \end{aligned} \right.\ and\quad \pi(\theta) = \pi(\alpha) \pi(\beta) \pi(\phi) \pi(\sigma^{2}_{a}),\quad Where,\ \beta = (\beta_{1},\beta_{2}, \beta_{3}, \beta_{4}) \end{align} \]

# Step1 : Stab code
HW4_code <-"
data {
  int<lower = 0> t;
  int<lower = 0> P;
  matrix[t, P] X;
  int Yt[t];
}

parameters {
  real<lower = -1, upper = 1> phi;
  real<lower = 0> Sigma_Squared;
  vector[P] Beta;
  vector[t] Eta;
}

transformed parameters {
  //vector<lower = 0>[t] Lambda;
  real<lower = 0> Sigma;

  //for(i in 1 : t) Lambda[i] = exp(row(X, i) * Beta + Eta[i]);
  Sigma = sqrt(Sigma_Squared);
}

model {
  
  phi ~ uniform(-1, 1);
  Sigma_Squared ~ inv_gamma(3, 40);
  for(i in 1 : P) Beta[i] ~ normal(0, 10);
  for(j in 1 : t){
    if (j == 1)
      Eta[j] ~ normal(0, Sigma);
    else
      Eta[j] ~ normal(phi * Eta[j - 1], Sigma);
  }
  
  for(i in 1 : t){
    Yt[i] ~ poisson_log(row(X, i) * Beta + Eta[i]);
  }
}"
setwd("/Users/linweixiang/R/Bayesian Analysis/Final project")
# Step 2 : Stan data
ecoli <- tscount::ecoli %>% 
  mutate(c52 = cos(2 * pi * `week` / 52),
         s52 = sin(2 * pi * `week`) / 52,
         c26 = cos(2 * pi * `week`) / 26,
         s26 = sin(2 * pi * `week`) / 26)

Y <- ecoli$cases
X <- matrix(1, ncol = 1, nrow = nrow(ecoli)) %>% 
  cbind(ecoli[4 : 7])
HW4_data <- list(t = length(Y), 
                 P = ncol(X), 
                 X = X, 
                 Yt = Y)
# Step 3: Modeling by stan
time.start <- Sys.time()
fit.hw4 <- stan(model_code = HW4_code,
                data = HW4_data,
                iter = 5000, chains = 4)
time.stop <- Sys.time()

In bayesian analysis, MCMC is very slow, so in rstan we can use rstanmulticore::pstan() function to speed up.

time.stop - time.start
## Time difference of 19.76387 mins
# Posterior inference
aa <- Sys.time()
fit.hw4 %>% summary() %>% .$summary %>% 
  round(., 3) %>% as.matrix() %>% 
  .[1:7, c(1, 3, 6, 9, 10)] %>% 
  kable(., "html") %>% 
  kable_styling(bootstrap_options = "striped",
                full_width = FALSE)
mean sd 50% n_eff Rhat
phi 0.527 0.055 0.528 8350.705 1.001
Sigma_Squared 0.256 0.016 0.255 10603.874 1.000
Beta[1] 2.911 0.390 2.909 6982.835 1.000
Beta[2] -0.080 0.060 -0.080 516.175 1.005
Beta[3] 0.029 10.025 0.020 19870.810 1.000
Beta[4] 0.158 10.081 0.210 7668.636 1.000
Beta[5] -0.020 10.073 0.027 19223.655 1.000
bb <- Sys.time( )
bb - aa
## Time difference of 3.447408 secs
# Visualization of ACF 
title_name <- c(expression(alpha), expression(beta[1]), 
                expression(beta[2]), expression(beta[3]), 
                expression(beta[4]))
for (i in 1:5) {
  name <- paste("g", i, sep = "_")
  g <- acf(extract(fit.hw4)$Beta[, i], 
           type = "correlation", 
           plot = FALSE, lag.max = 20) %>% 
    autoplot() + 
    labs(title = title_name[i])
  assign(name, g)
}
grid.arrange(g_1, g_2, g_3, g_4, g_5)

Above this figure, By the acf we can say these samples are independent.

# Visualization 
eta_mean <- rstan::summary(fit.hw4)$summary[8:653, 1] 
ggplot(data = NULL, 
       aes(x = seq(eta_mean), y = eta_mean)) + 
  geom_line() + 
  labs(x = expression(eta[t]), 
       y = "mean") + 
  geom_hline(yintercept = eta_mean %>% mean, 
             color = "red", 
             linetype = "dashed")