Chapter 1

Below is a complete, ready-to-use R script you can apply to any dataset related to the examples in your attached Bayesian Statistics document. Since the PDF does not contain raw data, I provide three full R code templates (Binomial, Normal, Poisson) corresponding to the three core examples:

Stroke Study (Binomial – Beta/Binomial)

Dietary Study (Normal – Normal/Normal)

Caries Study (Poisson – Gamma/Poisson)

For each example, you get:

✅ Data simulation template (you replace with your real a ✅ Summary statistics ✅ Bayesian posterior computation ✅ Visualizations (histograms, likelihood, prior, posterior) ✅ Step-by-step explanation (notes)

1. BINOMIAL–BETA CASE (Stroke SICH Incidence Example)

### ---------------------------------------------
### BINOMIAL – BETA POSTERIOR (Stroke Example)
### ---------------------------------------------

# Example data (replace with your actual numbers)
n  <- 50        # sample size
y  <- 10        # number of SICH cases

# Summary statistics
prop_est <- y / n
prop_est
## [1] 0.2
# Plot: Likelihood for Binomial
theta <- seq(0, 0.5, length = 1000)
likelihood <- dbinom(y, n, theta)

plot(theta, likelihood, type="l", lwd=2,
     xlab="Theta (SICH probability)", ylab="Likelihood",
     main="Binomial Likelihood")

# Prior: Beta(9,93) from ECASS-2 study
alpha0 <- 9
beta0  <- 93

# Posterior parameters
alpha_post <- alpha0 + y
beta_post  <- beta0 + (n - y)

alpha_post; beta_post
## [1] 19
## [1] 133
# Plot Prior vs Likelihood vs Posterior
prior    <- dbeta(theta, alpha0, beta0)
posterior <- dbeta(theta, alpha_post, beta_post)

plot(theta, prior, type="l", col="blue", lwd=2, ylim=c(0, max(posterior)),
     ylab="Density", xlab="Theta", main="Beta Prior, Likelihood, Posterior")
lines(theta, posterior, col="red", lwd=3)
legend("topright", legend=c("Prior Beta(9,93)", "Posterior Beta(19,133)"),
       col=c("blue", "red"), lwd=2)

Notes

Use this when your response is binary (success/failure).

Posterior is:

θ∣y∼Beta(α+y,β+n−y)

This plot will visually show shrinkage of likelihood toward prior.

2. NORMAL–NORMAL CASE (Cholesterol Intake Example)

### ---------------------------------------------
### NORMAL – NORMAL POSTERIOR (Dietary Study)
### ---------------------------------------------

# Replace with your own data
chol <- rnorm(50, mean=318, sd=119.5)   # Example dataset
summary(chol)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   133.3   271.9   356.2   358.6   419.6   665.4
# Sample parameters
ybar <- mean(chol)
s    <- sd(chol)
n    <- length(chol)

# Prior from IBBENS study (mean=328, SE=5.072)
mu0 <- 328
sigma0 <- 5.072

# Posterior mean and variance
post_mean <- ( (1/sigma0^2)*mu0 + (n/s^2)*ybar )  /  ( (1/sigma0^2) + n/s^2 )
post_var  <- 1 / ( (1/sigma0^2) + n/s^2 )
post_sd   <- sqrt(post_var)

post_mean; post_sd
## [1] 330.4935
## [1] 4.860603
# Plot Prior, Likelihood, Posterior
mu <- seq(250, 400, length=200)

prior      <- dnorm(mu, mu0, sigma0)
likelihood <- dnorm(mu, ybar, s/sqrt(n))
posterior  <- dnorm(mu, post_mean, post_sd)

plot(mu, prior, type="l", col="blue", lwd=2,
     main="Normal Prior, Likelihood, Posterior",
     xlab="Mean Cholesterol", ylab="Density")
lines(mu, likelihood, col="green", lwd=2)
lines(mu, posterior, col="red", lwd=2)

legend("topright", legend=c("Prior", "Likelihood", "Posterior"),
       col=c("blue","green","red"), lwd=2)

Notes

This is suitable for continuous data with known (or large-sample) variance.

Posterior is:

3. POISSON–GAMMA CASE (Caries Example)

### ---------------------------------------------
### POISSON – GAMMA POSTERIOR (Caries Study)
### ---------------------------------------------

# Replace with your actual dmft data
dmft <- rpois(4351, lambda = 2.24)   # Example data

summary(dmft)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   1.000   2.000   2.259   3.000   9.000
mean(dmft)
## [1] 2.259251
# Sufficient statistics
y_sum <- sum(dmft)
n     <- length(dmft)

# Prior Gamma(3,1)
alpha0 <- 3
beta0  <- 1

# Posterior parameters
alpha_post <- alpha0 + y_sum
beta_post  <- beta0 + n

alpha_post; beta_post
## [1] 9833
## [1] 4352
# Plot Prior and Posterior
theta <- seq(0, 8, length=500)
prior <- dgamma(theta, alpha0, beta0)
posterior <- dgamma(theta, alpha_post, beta_post)

plot(theta, prior, type="l", col="blue", lwd=2,
     main="Gamma Prior and Posterior",
     xlab="Mean dmft", ylab="Density")
lines(theta, posterior, col="red", lwd=2)
legend("topright", c("Prior Gamma(3,1)", "Posterior"),
       col=c("blue","red"), lwd=2)

Notes

For count data where outcome ~ Poisson(θ).

Posterior is:

θ∣y∼Gamma(α+∑yi, β+n)

Chapter Two

1. BINOMIAL–BETA MODEL
############################################################
BINOMIAL–BETA POSTERIOR (Stroke Example) Formula: Y ~ Binomial(n, θ) θ ~ Beta(α0, β0) Posterior: θ | y ~ Beta(α0 + y , β0 + n - y)
Summary Measures: Mean = α_post / (α_post + β_post) Mode = (α_post - 1) / (α_post + β_post - 2) Variance = α_post * β_post / [ (α_post + β_post)^2 (α_post + β_post + 1) ] SD = sqrt(variance) Median = qbeta(0.5) Credible interval = qbeta(c(0.025, 0.975))
############################################################
r n <- 50 # sample size y <- 10 # number of successes (SICH cases)

Prior Hyperparameters

alpha0 <- 9
beta0 <- 93
Posterior Hyperparameters
alpha_post <- alpha0 + y
beta_post <- beta0 + (n - y)
Posterior Summary Measures
post_mean <- alpha_post / (alpha_post + beta_post)
post_mode <- (alpha_post - 1) / (alpha_post + beta_post - 2)
post_var <- (alpha_post * beta_post) /
((alpha_post + beta_post)^2 * (alpha_post + beta_post + 1))
post_sd <- sqrt(post_var)
post_med <- qbeta(0.5, alpha_post, beta_post)
post_CI <- qbeta(c(0.025, 0.975), alpha_post, beta_post)

# Print Summary

list(
posterior_mean = post_mean,
posterior_mode = post_mode,
posterior_sd = post_sd,
median = post_med,
credible_interval_95 = post_CI
)
## $posterior_mean
## [1] 0.125
## 
## $posterior_mode
## [1] 0.12
## 
## $posterior_sd
## [1] 0.02673704
## 
## $median
## [1] 0.1233544
## 
## $credible_interval_95
## [1] 0.07748574 0.18184089
Plot Prior, Likelihood, Posterior
theta <- seq(0, 0.5, length = 2000)
Likelihood scaled to match density plot
likelihood_raw <- dbinom(y, n, theta)
likelihood_scaled <- likelihood_raw / max(likelihood_raw)

prior <- dbeta(theta, alpha0, beta0)
posterior <- dbeta(theta, alpha_post, beta_post)

plot(theta, prior, type="l", col="blue", lwd=2,
ylab="Density", main="Binomial–Beta: Prior, Likelihood, Posterior")
lines(theta, likelihood_scaled * max(prior), col="darkgreen", lwd=2, lty=2)
lines(theta, posterior, col="red", lwd=3)
legend("topright",
legend=c("Prior Beta(9,93)", "Scaled Likelihood",
paste0("Posterior Beta(",alpha_post,",",beta_post,")")),
col=c("blue","darkgreen","red"), lwd=2, lty=c(1,2,1))

2. POISSON–GAMMA MODEL

POISSON–GAMMA POSTERIOR (Caries Example) Model: Y_i ~ Poisson(λ) λ ~ Gamma(α0, β0) Posterior: λ | y ~ Gamma(α0 + Σy_i , β0 + n)

Summary Measures: Mean = α_post / β_post Variance = α_post / β_post^2 SD = sqrt(variance) Median = qgamma(0.5) 95% CI = qgamma(c(0.025,0.975))

# Example Caries Data from Chapter

y <- c(3,2,4)
n <- length(y)
sum_y <- sum(y)

# Prior

alpha0 <- 2
beta0 <- 1

# Posterior

alpha_post <- alpha0 + sum_y
beta_post <- beta0 + n

# Summary Measures

post_mean <- alpha_post / beta_post
post_var <- alpha_post / (beta_post^2)
post_sd <- sqrt(post_var)
post_med <- qgamma(0.5, alpha_post, beta_post)
post_CI <- qgamma(c(0.025,0.975), alpha_post, beta_post)

# Print results

list(
posterior_mean = post_mean,
posterior_sd = post_sd,
median = post_med,
credible_interval_95 = post_CI
)
## $posterior_mean
## [1] 2.75
## 
## $posterior_sd
## [1] 0.8291562
## 
## $median
## [1] 2.667131
## 
## $credible_interval_95
## [1] 1.372790 4.597589
# Plot

theta <- seq(0,6, length=1000)
prior <- dgamma(theta, alpha0, beta0)
post <- dgamma(theta, alpha_post, beta_post)

plot(theta, prior, type="l", col="blue", lwd=2,
main="Poisson–Gamma: Prior vs Posterior",
ylab="Density")
lines(theta, post, col="red", lwd=2)
legend("topright",
legend=c("Prior Gamma(2,1)",
paste0("Posterior Gamma(",alpha_post,",",beta_post,")")),
col=c("blue","red"), lwd=2)

3. NORMAL–NORMAL MODEL

NORMAL–NORMAL POSTERIOR (Dietary Example)

Model: y_i ~ Normal(μ, σ^2) μ ~ Normal(μ0, σ0^2)

Posterior: μ | y ~ Normal(μ_n , σ_n^2)

Formulas: μ_n = (μ0/σ0^2 + n*ȳ/σ^2) / (1/σ0^2 + n/σ^2) σ_n^2 = 1 / (1/σ0^2 + n/σ^2)

# Example dietary data

set.seed(99)
chol <- rnorm(50, mean=328, sd=20)

n <- length(chol)
ybar <- mean(chol)
s2 <- var(chol)

# Prior from chapter

mu0 <- 328
sigma0 <- 5.072

# Posterior

post_mean <- (mu0/sigma0^2 + n*ybar/s2) / (1/sigma0^2 + n/s2)
post_sd <- sqrt(1 / (1/sigma0^2 + n/s2))

CI_95 <- c(post_mean - 1.96*post_sd, post_mean + 1.96*post_sd)

list(
posterior_mean = post_mean,
posterior_sd = post_sd,
credible_interval_95 = CI_95
)
## $posterior_mean
## [1] 324.1855
## 
## $posterior_sd
## [1] 2.507436
## 
## $credible_interval_95
## [1] 319.2710 329.1001
# Plot Prior, Likelihood, Posterior

mu <- seq(250,400,length=500)

prior <- dnorm(mu, mu0, sigma0)
lik <- dnorm(mu, ybar, sqrt(s2/n))
post <- dnorm(mu, post_mean, post_sd)

plot(mu, prior, type="l", col="blue", lwd=2,
main="Normal–Normal: Prior, Likelihood & Posterior")
lines(mu, lik, col="darkgreen", lwd=2, lty=2)
lines(mu, post, col="red", lwd=3)
legend("topright", legend=c("Prior", "Likelihood", "Posterior"),
col=c("blue","darkgreen","red"), lwd=2, lty=c(1,2,1))

Champter 3

1. ONE-SAMPLE BAYESIAN ANALYSIS FOR NORMAL MEAN (μ)
## ------------------------------------------------------------
## Example dataset (replace with your own sample)
## ------------------------------------------------------------
set.seed(123)
y <- rnorm(20, mean = 120, sd = 15)  
n <- length(y)

## Summary statistics
sample_mean <- mean(y)
sample_sd   <- sd(y)
sample_var  <- var(y)
summary(y)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    90.5   112.6   121.8   122.1   128.2   146.8
## Plot: Histogram + sample mean
hist(y, main = "Histogram of the Sample", col="lightgray", breaks=10)
abline(v = sample_mean, col="red", lwd=2)

## ------------------------------------------------------------
## Bayesian Conjugate Prior:
## μ ~ Normal(μ0, τ0²)
## y | μ ~ Normal(μ, σ²)
##
## Posterior:
## μ | y ~ Normal( μn , τn² )
##
## Formulas:
## τn² = 1 / (1/τ0² + n/σ²)
## μn  = τn² * ( μ0/τ0² + n*ȳ/σ² )
## ------------------------------------------------------------

mu0   <- 110     # prior mean
tau0  <- 10      # prior SD
sigma <- sample_sd  # using sample SD as known variance

# Posterior variance and posterior mean
tau_n2 <- 1 / (1/tau0^2 + n/sigma^2)
tau_n  <- sqrt(tau_n2)
mu_n   <- tau_n2 * (mu0/tau0^2 + n*sample_mean/sigma^2)

# Posterior print
list(
  posterior_mean = mu_n,
  posterior_sd   = tau_n,
  CI_95 = c(mu_n - 1.96*tau_n, mu_n + 1.96*tau_n)
)
## $posterior_mean
## [1] 120.9581
## 
## $posterior_sd
## [1] 3.101537
## 
## $CI_95
## [1] 114.8790 127.0371
## Plot Prior vs Likelihood vs Posterior
mu_grid <- seq(sample_mean - 40, sample_mean + 40, length=400)

prior      <- dnorm(mu_grid, mu0, tau0)
likelihood <- dnorm(mu_grid, sample_mean, sigma/sqrt(n))
posterior  <- dnorm(mu_grid, mu_n, tau_n)

plot(mu_grid, prior, type="l", col="blue", lwd=2,
     main="Prior, Likelihood & Posterior", ylab="Density")
lines(mu_grid, likelihood, col="darkgreen", lwd=2, lty=2)
lines(mu_grid, posterior, col="red", lwd=2)
legend("topright", c("Prior","Likelihood","Posterior"),
       col=c("blue","darkgreen","red"), lwd=2, lty=c(1,2,1))

2. TWO–SAMPLE BAYESIAN COMPARISON (μ₁ – μ₂)
## ------------------------------------------------------------
## Example data (replace these with your two groups)
## ------------------------------------------------------------
set.seed(321)
group1 <- rnorm(25, mean = 130, sd = 12)
group2 <- rnorm(22, mean = 118, sd = 14)

n1 <- length(group1)
n2 <- length(group2)

mean1 <- mean(group1)
mean2 <- mean(group2)

var1 <- var(group1)
var2 <- var(group2)

## Summary statistics
list(
  group1_mean = mean1,
  group2_mean = mean2,
  difference  = mean1 - mean2,
  group1_sd = sqrt(var1),
  group2_sd = sqrt(var2)
)
## $group1_mean
## [1] 132.8133
## 
## $group2_mean
## [1] 116.0981
## 
## $difference
## [1] 16.71524
## 
## $group1_sd
## [1] 10.39231
## 
## $group2_sd
## [1] 14.37212
## Plot: Side-by-side boxplots
boxplot(group1, group2,
        names=c("Group 1", "Group 2"),
        col=c("lightblue","lightpink"),
        main="Boxplot of Two Groups")

## ------------------------------------------------------------
## Bayesian model:
## μ₁, μ₂ have independent Normal priors
## Posterior for difference:
## (μ₁ – μ₂) | data ~ Normal( mean_diff_post , var_diff_post )
##
## Posterior mean:
## μ1_n = ...
## μ2_n = ...
## diff_post_mean = μ1_n - μ2_n
##
## Posterior variance:
## var_diff_post = τ1_n^2 + τ2_n^2
## ------------------------------------------------------------

# Priors
mu01 <- 125; tau01 <- 10
mu02 <- 125; tau02 <- 10

# "Known" variances taken from sample variances
sigma1 <- sqrt(var1)
sigma2 <- sqrt(var2)

# Posterior variances
tau1_n2 <- 1 / (1/tau01^2 + n1/sigma1^2)
tau2_n2 <- 1 / (1/tau02^2 + n2/sigma2^2)

# Posterior means
mu1_n <- tau1_n2 * (mu01/tau01^2 + n1*mean1/sigma1^2)
mu2_n <- tau2_n2 * (mu02/tau02^2 + n2*mean2/sigma2^2)

# Posterior for difference
diff_mean_post <- mu1_n - mu2_n
diff_var_post  <- tau1_n2 + tau2_n2
diff_sd_post   <- sqrt(diff_var_post)

list(
  posterior_mean_difference = diff_mean_post,
  posterior_sd = diff_sd_post,
  CI_95 = c(diff_mean_post - 1.96*diff_sd_post,
            diff_mean_post + 1.96*diff_sd_post)
)
## $posterior_mean_difference
## [1] 15.62761
## 
## $posterior_sd
## [1] 3.567104
## 
## $CI_95
## [1]  8.63609 22.61914
## Plot posterior of μ₁ – μ₂
dgrid <- seq(diff_mean_post - 40, diff_mean_post + 40, length=500)
post_diff <- dnorm(dgrid, diff_mean_post, diff_sd_post)

plot(dgrid, post_diff, type="l", col="purple", lwd=2,
     main="Posterior Distribution of (μ₁ – μ₂)",
     xlab="Difference in Means", ylab="Posterior Density")
abline(v = diff_mean_post, col="red", lwd=2)

3. POSTERIOR PREDICTIVE DISTRIBUTION
## Posterior predictive distribution:
## y_new | data ~ Normal( μ_n , σ² + τ_n² )

post_pred_mean <- mu_n
post_pred_sd   <- sqrt(sigma^2 + tau_n^2)

# Draw 1000 posterior predictive samples
y_new <- rnorm(1000, post_pred_mean, post_pred_sd)

hist(y_new, col="lightgray",
     main="Posterior Predictive Distribution",
     xlab="Predicted Observation")
abline(v = post_pred_mean, col="red", lwd=2)

# 1. One-sample Normal model — analytic posterior and composition sampling

Simulate example data (replace with your own vector y)
n <- 30

# Simulate sample from N(120, 15^2)

y <- rnorm(n, mean = 120, sd = 15)

# Basic sample summaries

# sample mean: \bar{y} = (1/n) \sum y_i

ybar <- mean(y)      # sample mean
s    <- sd(y)        # sample standard deviation (sqrt(sample variance))
s2   <- var(y)       # sample variance

# Print quick summaries

cat("n =", n, "\n")
## n = 30
cat("sample mean =", round(ybar,3), "sample SD =", round(s,3), "\n\n")
## sample mean = 119.773 sample SD = 13.265
# ---------------------------

# Analytic Normal-Normal posterior (treat sigma as known = sample sd)

# ---------------------------

# Prior for mu

mu0  <- 110     # prior mean (choose)
tau0 <- 10      # prior sd (choose)

# Treat sigma (observation SD) as known for conjugacy

sigma <- s      # NOTE: this is an approximation; better to use composition for unknown sigma

# Posterior variance (tau_n^2) and mean (mu_n)

tau_n2 <- 1 / (1/tau0^2 + n / sigma^2)   # posterior variance of mu
tau_n  <- sqrt(tau_n2)                   # posterior sd
mu_n   <- tau_n2 * (mu0 / tau0^2 + n * ybar / sigma^2)  # posterior mean

# Show posterior summary

cat("Conjugate Normal-Normal posterior (approx treating sigma known):\n")
## Conjugate Normal-Normal posterior (approx treating sigma known):
cat(" posterior mean =", round(mu_n,4), "posterior sd =", round(tau_n,4), "\n\n")
##  posterior mean = 119.2317 posterior sd = 2.3537
# ---------------------------

# Composition sampling: unknown sigma (noninformative prior p(mu,sigma^2) \propto sigma^{-2})

# Steps:

# 1) draw sigma^2_k = (n-1)*s^2 / chi2_{n-1}

# 2) draw mu_k ~ Normal(ybar, sigma^2_k / n)

# ---------------------------

K <- 5000           # number of posterior samples
mu_samples <- numeric(K)
sigma2_samples <- numeric(K)

for (k in 1:K) {

# draw from chi-square with df = n-1

chi_val <- rchisq(1, df = n - 1)

# inverse-chi-square draw for sigma^2 (scale form)

sigma2_k <- (n - 1) * s2 / chi_val
sigma2_samples[k] <- sigma2_k

# conditional draw for mu

mu_samples[k] <- rnorm(1, mean = ybar, sd = sqrt(sigma2_k / n))
}

# Summaries from composition sampling

# posterior mean of mu, posterior sd, credible interval

mu_post_mean <- mean(mu_samples)
mu_post_sd   <- sd(mu_samples)
mu_post_CI   <- quantile(mu_samples, probs = c(0.025, 0.975))

cat("Composition sampling results for mu:\n")
## Composition sampling results for mu:
cat(" mean =", round(mu_post_mean,4), " sd =", round(mu_post_sd,4),
" 95% CI =", round(mu_post_CI[1],4), "-", round(mu_post_CI[2],4), "\n\n")
##  mean = 119.7804  sd = 2.5561  95% CI = 114.7432 - 124.8056
# Diagnostics: traceplot and density

par(mfrow = c(2,1))
plot(mu_samples, type = "l", main = "Traceplot: mu samples (composition)", ylab = "mu")
hist(mu_samples, breaks = 40, main = "Posterior density of mu (composition)", xlab = "mu")

par(mfrow = c(1,1))

Notes (inline & interpretation):

The composition sampler generates joint draws from the exact posterior under the noninformative prior.

Posterior mean of μ from composition should be close to sample mean. Posterior spread accounts for uncertainty in σ².

Use the composition samples to compute any marginal summaries, HPD, or predictive draws.

2. Two-sample comparison: posterior for μ₁ − μ₂

# ---------------------------

# Simulate two independent groups (replace with your real data)

# ---------------------------

set.seed(2026)
group1 <- rnorm(28, mean = 130, sd = 12)   # group 1 data
group2 <- rnorm(24, mean = 118, sd = 14)   # group 2 data

# Basic summaries

n1 <- length(group1); n2 <- length(group2)
ybar1 <- mean(group1); ybar2 <- mean(group2)
s1 <- sd(group1); s2 <- sd(group2)

cat("Group1: n=", n1, " mean=", round(ybar1,3), " sd=", round(s1,3), "\n")
## Group1: n= 28  mean= 128.053  sd= 12.824
cat("Group2: n=", n2, " mean=", round(ybar2,3), " sd=", round(s2,3), "\n\n")
## Group2: n= 24  mean= 119.373  sd= 11.783
# ---------------------------

# Conjugate Normal-Normal approach (approx: treat sigma_j as known = sample sd)

# Priors for mu1 and mu2 (choose weakly informative)

# ---------------------------

mu01 <- 120; tau01 <- 20   # prior mean and sd for mu1
mu02 <- 120; tau02 <- 20   # prior mean and sd for mu2

# Use sample SDs as plug-in observation SD (approximation)

sigma1 <- s1
sigma2 <- s2

# Posterior variance and means for each mu_j

tau1n2 <- 1 / (1 / tau01^2 + n1 / sigma1^2)
tau2n2 <- 1 / (1 / tau02^2 + n2 / sigma2^2)
mu1n   <- tau1n2 * (mu01 / tau01^2 + n1 * ybar1 / sigma1^2)
mu2n   <- tau2n2 * (mu02 / tau02^2 + n2 * ybar2 / sigma2^2)

# Posterior for difference mu1 - mu2

diff_mean <- mu1n - mu2n
diff_var  <- tau1n2 + tau2n2
diff_sd   <- sqrt(diff_var)
diff_CI   <- c(diff_mean - 1.96 * diff_sd, diff_mean + 1.96 * diff_sd)

list(
posterior_mean_diff = diff_mean,
posterior_sd_diff = diff_sd,
CI_95 = diff_CI
)
## $posterior_mean_diff
## [1] 8.553858
## 
## $posterior_sd_diff
## [1] 3.389847
## 
## $CI_95
## [1]  1.909757 15.197959
# Plot posterior density of difference

xgrid <- seq(diff_mean - 4*diff_sd, diff_mean + 4*diff_sd, length.out = 400)
plot(xgrid, dnorm(xgrid, diff_mean, diff_sd), type="l", col="purple", lwd=2,
main="Posterior (approx) of μ1 − μ2", xlab="μ1 − μ2")
abline(v = diff_mean, col="red", lwd=2)

Notes & cautions:

The plug-in approach (using sample SDs as known) is an approximation. For full Bayesian inference with unknown σ₁² and σ₂², use composition or hierarchical modeling sampling σ² and then μ conditionally.

If you prefer exact composition: sample σ1² ~ Inv-χ²(n1-1, s1²) and σ2² similarly, then sample μ1 and μ2 conditional on each σ²; compute their difference per draw to form the posterior distribution.

# Simulated categorical data (replace with real counts)

counts <- c(180, 41, 216, 64)  # example 2x2 table flattened or counts per cell
n_cat <- sum(counts)

# Prior Dirichlet hyperparameters (uninformative = 1 for each category)

alpha <- rep(1, length(counts))

# Posterior Dirichlet parameters

alpha_post <- alpha + counts

# Posterior means

theta_post_mean <- alpha_post / sum(alpha_post)

list(
counts = counts,
prior_alpha = alpha,
posterior_alpha = alpha_post,
posterior_mean = round(theta_post_mean,4)
)
## $counts
## [1] 180  41 216  64
## 
## $prior_alpha
## [1] 1 1 1 1
## 
## $posterior_alpha
## [1] 181  42 217  65
## 
## $posterior_mean
## [1] 0.3584 0.0832 0.4297 0.1287
# Plot prior vs posterior (barplots)

par(mfrow=c(1,2))
barplot(alpha/sum(alpha), main="Prior mean (Dirichlet)", ylim=c(0,1))
barplot(theta_post_mean, main="Posterior mean (Dirichlet)", ylim=c(0,1))

par(mfrow=c(1,1))
  1. Bayesian linear regression via Method of Composition
library(MASS)                # MUST be loaded
## Warning: package 'MASS' was built under R version 4.4.3
# ---------------------------

# Simulate a simple regression dataset (replace with your real X, y)

# ---------------------------

set.seed(2027)
n <- 120
x <- runif(n, 18, 40)                     # predictor (e.g., BMI, age...)
beta0_true <- 0.8
beta1_true <- 0.04
sigma_true <- 0.28
y_reg <- beta0_true + beta1_true * x + rnorm(n, sd = sigma_true)

# Build design matrix with intercept

X <- cbind(1, x)
d <- ncol(X) - 1   # number of predictors excluding intercept

# Frequentist OLS estimates (for posterior center)

XtX_inv <- solve(t(X) %*% X)
beta_hat <- XtX_inv %*% t(X) %*% y_reg
resid <- y_reg - X %*% beta_hat
s2 <- sum(resid^2) / (n - d - 1)   # residual variance estimator (s^2)

# Print OLS summaries

cat("OLS beta_hat:\n"); print(round(beta_hat,4))
## OLS beta_hat:
##     [,1]
##   0.6623
## x 0.0441
cat("Residual s^2 =", round(s2,5), "\n\n")
## Residual s^2 = 0.06706
# ---------------------------

# Composition sampling

# ---------------------------

K <- 4000
sigma2_samples_reg <- numeric(K)
beta_samples <- matrix(NA, nrow = K, ncol = length(beta_hat))

nu <- n - d - 1  # degrees of freedom for Inv-chi-square

for (k in 1:K) {

# sample sigma^2 from Inv-chi-square(n-d-1, s2)

chi_val <- rchisq(1, df = nu)
sigma2_k <- nu * s2 / chi_val
sigma2_samples_reg[k] <- sigma2_k

# conditional sample for beta ~ N(beta_hat, sigma2_k * (X'X)^{-1})

beta_samples[k, ] <- mvrnorm(1, mu = as.vector(beta_hat), Sigma = sigma2_k * XtX_inv)
}

# Posterior summaries for beta

beta_post_mean <- colMeans(beta_samples)
beta_post_sd   <- apply(beta_samples, 2, sd)

names(beta_post_mean) <- c("beta0","beta1")
list(
posterior_mean_beta = round(beta_post_mean,4),
posterior_sd_beta   = round(beta_post_sd,4)
)
## $posterior_mean_beta
##  beta0  beta1 
## 0.6613 0.0441 
## 
## $posterior_sd_beta
## [1] 0.1151 0.0038
# Joint posterior plot: scatter of beta0 vs beta1

plot(beta_samples[,1], beta_samples[,2], pch=16, cex=0.4,
main="Joint posterior samples: beta0 vs beta1",
xlab="beta0", ylab="beta1")
abline(v = beta_hat[1], col="blue", lwd=2)  # frequentist center
abline(h = beta_hat[2], col="blue", lwd=2)

  1. Posterior predictive distributions
# 5.1 One-sample predictive using composition draws (from above mu_samples, sigma2_samples)

# Draw 1000 predictive observations using the K composition draws for the one-sample example

Kpp <- 1000
idx <- sample(1:K, size = Kpp, replace = TRUE)  # sample indices from previous composition draws
y_pred_samples <- rnorm(Kpp, mean = mu_samples[idx], sd = sqrt(sigma2_samples[idx]))

# Plot predictive histogram

hist(y_pred_samples, breaks = 30, main = "Posterior predictive (one-sample)", xlab = "y_new", col = "lightgray")
abline(v = mean(y_pred_samples), col = "red", lwd = 2)

# 5.2 Regression predictive for x* = 30 (BMI=30 example)

xstar <- c(1, 30)   # intercept + covariate

# For each posterior beta and sigma2 sample compute predictive mean & draw

Kreg <- nrow(beta_samples)
yreg_pred <- numeric(Kreg)
for (k in 1:Kreg) {
mu_pred_k <- sum(beta_samples[k, ] * xstar)

# predictive variance: sigma2_k * (1 + xstar' (X'X)^{-1} xstar)

pred_var_k <- sigma2_samples_reg[k] * (1 + t(xstar) %*% XtX_inv %*% xstar)
yreg_pred[k] <- rnorm(1, mean = mu_pred_k, sd = sqrt(pred_var_k))
}
hist(yreg_pred, breaks = 40, main = "Posterior predictive for x*=30 (regression)", col="lightblue")
abline(v = mean(yreg_pred), col = "red", lwd = 2)

Gibbs sampler (Normal-Normal) + Hierarchical Beta-Binomial (Gibbs + MH)

with MCMC diagnostics (traceplots, ACF, ESS)

Run entire script in R. Requires: coda, ggplot2 (will auto-install if missing)

set.seed(2025)

# ---------- Package setup ----------
needed <- c("coda", "ggplot2")
to_install <- needed[!(needed %in% installed.packages()[, "Package"])]
if(length(to_install)) install.packages(to_install, repos="https://cloud.r-project.org")
library(coda)
## Warning: package 'coda' was built under R version 4.4.3
library(ggplot2)

# ===================================================================
# PART A — Gibbs sampler for Normal model (conjugate priors)
# Model:
#   y_i ~ Normal(mu, sigma2)
#   mu ~ Normal(mu0, tau2)   (tau2 = prior variance)
#   sigma2 ~ Inverse-Gamma(a0, b0)  (parameterization where density ~ b0^a0 / Gamma(a0) * sigma2^{-a0-1} * exp(-b0/sigma2))
# Gibbs updates: mu | sigma2, y  ~ Normal(...)
#                sigma2 | mu, y  ~ Inverse-Gamma(...)
# ===================================================================

# Simulate data
n <- 80
true_mu <- 2.0
true_sigma2 <- 1.5
y <- rnorm(n, mean = true_mu, sd = sqrt(true_sigma2))

# Prior hyperparameters
mu0 <- 0       # prior mean
tau2 <- 10     # prior var (large = vague)
a0 <- 2.0      # IG shape
b0 <- 1.0      # IG scale

# Gibbs sampler
gibbs_normal <- function(y, niter = 5000, burn = 1000) {
  n <- length(y)
  # storage
  mu_chain <- numeric(niter)
  sigma2_chain <- numeric(niter)
  # init
  mu_curr <- mean(y)
  sigma2_curr <- var(y)
  
  for (i in 1:niter) {
    # mu | sigma2, y  (Normal)
    mu_var <- 1 / (n / sigma2_curr + 1 / tau2)
    mu_mean <- mu_var * (sum(y) / sigma2_curr + mu0 / tau2)
    mu_curr <- rnorm(1, mean = mu_mean, sd = sqrt(mu_var))
    
    # sigma2 | mu, y  (Inverse-Gamma)
    a_n <- a0 + n/2
    ssq <- sum((y - mu_curr)^2)
    b_n <- b0 + 0.5 * ssq
    # draw from Inverse-Gamma by drawing from Gamma for 1/sigma2
    inv_sigma2 <- rgamma(1, shape = a_n, rate = b_n) # rate = b_n
    sigma2_curr <- 1 / inv_sigma2
    
    mu_chain[i] <- mu_curr
    sigma2_chain[i] <- sigma2_curr
  }
  
  res <- list(mu = mu_chain[-(1:burn)], sigma2 = sigma2_chain[-(1:burn)], raw_mu = mu_chain, raw_sigma2 = sigma2_chain)
  return(res)
}

# Run Gibbs for Normal model
gibbs_res <- gibbs_normal(y, niter = 8000, burn = 2000)

# Summaries
cat("== Normal model posterior summaries ==\n")
## == Normal model posterior summaries ==
cat("Posterior mean mu:", mean(gibbs_res$mu), "True mu:", true_mu, "\n")
## Posterior mean mu: 2.054085 True mu: 2
cat("Posterior mean sigma2:", mean(gibbs_res$sigma2), "True sigma2:", true_sigma2, "\n\n")
## Posterior mean sigma2: 1.5367 True sigma2: 1.5
# ===================================================================
# PART B — Hierarchical Beta-Binomial model (Gibbs for theta_j, MH for alpha,beta)
# Model:
#   For j = 1..J:
#     y_j ~ Binomial(n_j, theta_j)
#     theta_j ~ Beta(alpha, beta)
#   alpha ~ Gamma(a_alpha, b_alpha)
#   beta  ~ Gamma(a_beta,  b_beta)
# Gibbs steps:
#   theta_j | rest ~ Beta(alpha + y_j, beta + n_j - y_j)
#   alpha, beta | thetas: no closed form -> Metropolis steps on log(alpha), log(beta)
# This is a Metropolis-within-Gibbs sampler.
# ===================================================================

# Simulate hierarchical binomial data
J <- 20
true_alpha <- 2.5
true_beta  <- 5.0
set.seed(123)
theta_true <- rbeta(J, true_alpha, true_beta)
n_j <- sample(20:80, J, replace = TRUE)
y_j <- rbinom(J, n_j, theta_true)

# Hyperpriors for alpha and beta
# Use Gamma priors on alpha and beta (shape, rate)
a_alpha <- 1.0; b_alpha <- 0.1
a_beta  <- 1.0; b_beta  <- 0.1

# Log posterior for alpha, beta (given current theta's)
log_post_alpha_beta <- function(alpha, beta, thetas) {
  if(alpha <= 0 || beta <= 0) return(-Inf)
  J <- length(thetas)
  # log prior (Gamma(shape, rate))
  log_prior <- dgamma(alpha, shape = a_alpha, rate = b_alpha, log = TRUE) +
    dgamma(beta,  shape = a_beta,  rate = b_beta, log = TRUE)
  # log likelihood of thetas given alpha,beta: product Beta(thetas | alpha,beta)
  # log density for Beta: (alpha-1)*log(theta) + (beta-1)*log(1-theta) - lbeta(alpha,beta)
  sum_log_terms <- sum((alpha - 1) * log(thetas) + (beta - 1) * log(1 - thetas))
  log_norm_const <- - J * lbeta(alpha, beta)
  return(log_prior + sum_log_terms + log_norm_const)
}

# Metropolis-within-Gibbs sampler
mh_within_gibbs_beta_binomial <- function(y, n_j, niter = 10000, burn = 2000,
                                          prop_sd_log_alpha = 0.15, prop_sd_log_beta = 0.15,
                                          init_alpha = 1, init_beta = 1) {
  J <- length(y)
  # storage
  thetas_chain <- matrix(NA, nrow = niter, ncol = J)
  alpha_chain <- numeric(niter)
  beta_chain  <- numeric(niter)
  
  # initialize
  alpha_curr <- init_alpha
  beta_curr  <- init_beta
  # init thetas using posterior given init hyperparams
  theta_curr <- rbeta(J, alpha_curr + y, beta_curr + n_j - y)
  
  for (it in 1:niter) {
    # 1) Gibbs: sample each theta_j | rest
    for (j in 1:J) {
      theta_curr[j] <- rbeta(1, alpha_curr + y[j], beta_curr + n_j[j] - y[j])
      # avoid exact 0/1
      theta_curr[j] <- pmin(pmax(theta_curr[j], 1e-10), 1-1e-10)
    }
    
    # 2) Metropolis update for alpha (update log(alpha))
    log_alpha_prop <- log(alpha_curr) + rnorm(1, 0, prop_sd_log_alpha)
    alpha_prop <- exp(log_alpha_prop)
    log_r_alpha <- log_post_alpha_beta(alpha_prop, beta_curr, theta_curr) -
      log_post_alpha_beta(alpha_curr, beta_curr, theta_curr) +
      log_alpha_prop - log(alpha_curr) # Jacobian if we propose on log-scale (but symmetric is fine; include if prior not on log)
    # For symmetric normal on log scale, Jacobian terms cancel so extra term not required; but adding doesn't harm much.
    if (log(runif(1)) < log_r_alpha) alpha_curr <- alpha_prop
    
    # 3) Metropolis update for beta (update log(beta))
    log_beta_prop <- log(beta_curr) + rnorm(1, 0, prop_sd_log_beta)
    beta_prop <- exp(log_beta_prop)
    log_r_beta <- log_post_alpha_beta(alpha_curr, beta_prop, theta_curr) -
      log_post_alpha_beta(alpha_curr, beta_curr, theta_curr) +
      log_beta_prop - log(beta_curr)
    if (log(runif(1)) < log_r_beta) beta_curr <- beta_prop
    
    # store
    thetas_chain[it, ] <- theta_curr
    alpha_chain[it] <- alpha_curr
    beta_chain[it] <- beta_curr
  }
  
  # drop burn-in
  out <- list(thetas = thetas_chain[-(1:burn), , drop = FALSE],
              alpha  = alpha_chain[-(1:burn)],
              beta   = beta_chain[-(1:burn)],
              raw = list(thetas_raw = thetas_chain, alpha_raw = alpha_chain, beta_raw = beta_chain))
  return(out)
}

# Run hierarchical sampler
set.seed(2025)
hier_res <- mh_within_gibbs_beta_binomial(y_j, n_j, niter = 12000, burn = 3000,
                                          prop_sd_log_alpha = 0.12, prop_sd_log_beta = 0.12,
                                          init_alpha = 1.0, init_beta = 1.0)

# Summaries
cat("== Hierarchical Beta-Binomial posterior summaries ==\n")
## == Hierarchical Beta-Binomial posterior summaries ==
cat("True alpha:", true_alpha, "Posterior mean alpha:", mean(hier_res$alpha), "\n")
## True alpha: 2.5 Posterior mean alpha: 3.418426
cat("True beta:", true_beta, "Posterior mean beta:", mean(hier_res$beta), "\n\n")
## True beta: 5 Posterior mean beta: 6.765439
cat("Posterior mean of first 6 group thetas vs true thetas:\n")
## Posterior mean of first 6 group thetas vs true thetas:
print(data.frame(true = theta_true[1:6], post_mean = colMeans(hier_res$thetas)[1:6], y = y_j[1:6], n = n_j[1:6]))
##        true post_mean  y  n
## 1 0.2311690 0.2905453 17 60
## 2 0.2891404 0.3434913 10 29
## 3 0.7014177 0.6418564 30 42
## 4 0.3474979 0.3815572 18 46
## 5 0.3595317 0.3746800 30 79
## 6 0.4312335 0.4566386 34 72
# ===================================================================
# PART C — MCMC diagnostics
# - Traceplots
# - Autocorrelation (ACF)
# - Effective Sample Size (ESS)
# Use coda where convenient
# ===================================================================

# ---- 1) Normal Gibbs diagnostics ----
mu_mcmc <- mcmc(gibbs_res$raw_mu)        # include raw chain to inspect mixing & burn visually
sigma2_mcmc <- mcmc(gibbs_res$raw_sigma2)

# Traceplots
par(mfrow = c(2,1))
plot(mu_mcmc, main = "Traceplot: mu (Normal Gibbs)", ylab = "mu")

plot(sigma2_mcmc, main = "Traceplot: sigma2 (Normal Gibbs)", ylab = "sigma2")

# ACF
par(mfrow = c(2,1))
acf(as.numeric(gibbs_res$raw_mu), main = "ACF: mu (Normal Gibbs)")
acf(as.numeric(gibbs_res$raw_sigma2), main = "ACF: sigma2 (Normal Gibbs)")

# Effective sample size
ess_mu <- effectiveSize(gibbs_res$mu)
ess_sigma2 <- effectiveSize(gibbs_res$sigma2)
cat("\nNormal Gibbs ESS: mu =", round(ess_mu,2), ", sigma2 =", round(ess_sigma2,2), "\n")
## 
## Normal Gibbs ESS: mu = 6000 , sigma2 = 6000
# ---- 2) Hierarchical model diagnostics (alpha, beta, and a few thetas) ----
alpha_mcmc <- mcmc(hier_res$raw$alpha_raw)
beta_mcmc  <- mcmc(hier_res$raw$beta_raw)
theta1_mcmc <- mcmc(hier_res$raw$thetas_raw[,1])
theta2_mcmc <- mcmc(hier_res$raw$thetas_raw[,2])

# Traceplots (alpha & beta)
par(mfrow = c(2,1))
plot(alpha_mcmc, main="Traceplot: alpha (hierarchical raw)", ylab="alpha")

plot(beta_mcmc, main="Traceplot: beta (hierarchical raw)", ylab="beta")

# Traceplot for a couple of thetas
par(mfrow = c(2,1))
plot(theta1_mcmc, main="Traceplot: theta[1] (hierarchical raw)", ylab="theta1")

plot(theta2_mcmc, main="Traceplot: theta[2] (hierarchical raw)", ylab="theta2")

# ACF plots
par(mfrow = c(3,1))
acf(alpha_mcmc, main="ACF: alpha")
acf(beta_mcmc,  main="ACF: beta")
acf(theta1_mcmc, main="ACF: theta[1]")

# Effective sample sizes
ess_alpha <- effectiveSize(hier_res$alpha)
ess_beta  <- effectiveSize(hier_res$beta)
ess_theta1 <- effectiveSize(hier_res$thetas[,1])
cat("\nHierarchical model ESS:\n")
## 
## Hierarchical model ESS:
cat(" alpha:", round(ess_alpha,2), " beta:", round(ess_beta,2), 
    " theta[1]:", round(ess_theta1,2), "\n\n")
##  alpha: 47.68  beta: 52.75  theta[1]: 8221.41
# ---- 3) Quick diagnostics summary plots using ggplot (posterior densities) ----
# Convert to data frames for ggplot density overlays
df_norm <- data.frame(mu = gibbs_res$mu, sigma2 = gibbs_res$sigma2)
p1 <- ggplot(df_norm, aes(x = mu)) + geom_density() + ggtitle("Posterior density: mu (Normal Gibbs)")
p2 <- ggplot(df_norm, aes(x = sigma2)) + geom_density() + ggtitle("Posterior density: sigma2 (Normal Gibbs)")
print(p1); print(p2)

df_hier_ab <- data.frame(alpha = hier_res$alpha, beta = hier_res$beta)
p3 <- ggplot(df_hier_ab, aes(x = alpha)) + geom_density() + ggtitle("Posterior density: alpha (hierarchical)")
p4 <- ggplot(df_hier_ab, aes(x = beta)) + geom_density() + ggtitle("Posterior density: beta (hierarchical)")
print(p3); print(p4)

# ===================================================================
# Optional: Gelman-Rubin (R-hat) requires multiple chains.
# For quick check, run 2 short independent chains for alpha & beta and compute R-hat.
# ===================================================================
run_two_chains <- function() {
  res1 <- mh_within_gibbs_beta_binomial(y_j, n_j, niter = 6000, burn = 1500,
                                        prop_sd_log_alpha = 0.12, prop_sd_log_beta = 0.12,
                                        init_alpha = 0.8, init_beta = 0.8)
  res2 <- mh_within_gibbs_beta_binomial(y_j, n_j, niter = 6000, burn = 1500,
                                        prop_sd_log_alpha = 0.12, prop_sd_log_beta = 0.12,
                                        init_alpha = 3.0, init_beta = 6.0)
  # form mcmc.list
  m1 <- mcmc(res1$alpha)
  m2 <- mcmc(res2$alpha)
  mlist <- mcmc.list(m1, m2)
  gr_alpha <- gelman.diag(mlist, autoburnin = FALSE)
  return(gr_alpha)
}

# (Run if you want R-hat check; can be somewhat slow)
# gr_alpha <- run_two_chains()
# print(gr_alpha)

# Final note to user
cat("\nFinished. You now have:\n - Gibbs sampler for Normal model (closed form).\n - Hierarchical Beta-Binomial sampler (Gibbs for theta_j + MH for alpha/beta).\n - Traceplots, ACFs, and ESS computed for main parameters.\n\n")
## 
## Finished. You now have:
##  - Gibbs sampler for Normal model (closed form).
##  - Hierarchical Beta-Binomial sampler (Gibbs for theta_j + MH for alpha/beta).
##  - Traceplots, ACFs, and ESS computed for main parameters.