Below is a complete, ready-to-use R script you can apply to any dataset related to the examples in your attached Bayesian Statistics document. Since the PDF does not contain raw data, I provide three full R code templates (Binomial, Normal, Poisson) corresponding to the three core examples:
Stroke Study (Binomial – Beta/Binomial)
Dietary Study (Normal – Normal/Normal)
Caries Study (Poisson – Gamma/Poisson)
For each example, you get:
✅ Data simulation template (you replace with your real a ✅ Summary statistics ✅ Bayesian posterior computation ✅ Visualizations (histograms, likelihood, prior, posterior) ✅ Step-by-step explanation (notes)
### ---------------------------------------------
### BINOMIAL – BETA POSTERIOR (Stroke Example)
### ---------------------------------------------
# Example data (replace with your actual numbers)
n <- 50 # sample size
y <- 10 # number of SICH cases
# Summary statistics
prop_est <- y / n
prop_est
## [1] 0.2
# Plot: Likelihood for Binomial
theta <- seq(0, 0.5, length = 1000)
likelihood <- dbinom(y, n, theta)
plot(theta, likelihood, type="l", lwd=2,
xlab="Theta (SICH probability)", ylab="Likelihood",
main="Binomial Likelihood")
# Prior: Beta(9,93) from ECASS-2 study
alpha0 <- 9
beta0 <- 93
# Posterior parameters
alpha_post <- alpha0 + y
beta_post <- beta0 + (n - y)
alpha_post; beta_post
## [1] 19
## [1] 133
# Plot Prior vs Likelihood vs Posterior
prior <- dbeta(theta, alpha0, beta0)
posterior <- dbeta(theta, alpha_post, beta_post)
plot(theta, prior, type="l", col="blue", lwd=2, ylim=c(0, max(posterior)),
ylab="Density", xlab="Theta", main="Beta Prior, Likelihood, Posterior")
lines(theta, posterior, col="red", lwd=3)
legend("topright", legend=c("Prior Beta(9,93)", "Posterior Beta(19,133)"),
col=c("blue", "red"), lwd=2)
Notes
Use this when your response is binary (success/failure).
Posterior is:
θ∣y∼Beta(α+y,β+n−y)
This plot will visually show shrinkage of likelihood toward prior.
### ---------------------------------------------
### NORMAL – NORMAL POSTERIOR (Dietary Study)
### ---------------------------------------------
# Replace with your own data
chol <- rnorm(50, mean=318, sd=119.5) # Example dataset
summary(chol)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 133.3 271.9 356.2 358.6 419.6 665.4
# Sample parameters
ybar <- mean(chol)
s <- sd(chol)
n <- length(chol)
# Prior from IBBENS study (mean=328, SE=5.072)
mu0 <- 328
sigma0 <- 5.072
# Posterior mean and variance
post_mean <- ( (1/sigma0^2)*mu0 + (n/s^2)*ybar ) / ( (1/sigma0^2) + n/s^2 )
post_var <- 1 / ( (1/sigma0^2) + n/s^2 )
post_sd <- sqrt(post_var)
post_mean; post_sd
## [1] 330.4935
## [1] 4.860603
# Plot Prior, Likelihood, Posterior
mu <- seq(250, 400, length=200)
prior <- dnorm(mu, mu0, sigma0)
likelihood <- dnorm(mu, ybar, s/sqrt(n))
posterior <- dnorm(mu, post_mean, post_sd)
plot(mu, prior, type="l", col="blue", lwd=2,
main="Normal Prior, Likelihood, Posterior",
xlab="Mean Cholesterol", ylab="Density")
lines(mu, likelihood, col="green", lwd=2)
lines(mu, posterior, col="red", lwd=2)
legend("topright", legend=c("Prior", "Likelihood", "Posterior"),
col=c("blue","green","red"), lwd=2)
Notes
This is suitable for continuous data with known (or large-sample) variance.
Posterior is:
### ---------------------------------------------
### POISSON – GAMMA POSTERIOR (Caries Study)
### ---------------------------------------------
# Replace with your actual dmft data
dmft <- rpois(4351, lambda = 2.24) # Example data
summary(dmft)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 1.000 2.000 2.259 3.000 9.000
mean(dmft)
## [1] 2.259251
# Sufficient statistics
y_sum <- sum(dmft)
n <- length(dmft)
# Prior Gamma(3,1)
alpha0 <- 3
beta0 <- 1
# Posterior parameters
alpha_post <- alpha0 + y_sum
beta_post <- beta0 + n
alpha_post; beta_post
## [1] 9833
## [1] 4352
# Plot Prior and Posterior
theta <- seq(0, 8, length=500)
prior <- dgamma(theta, alpha0, beta0)
posterior <- dgamma(theta, alpha_post, beta_post)
plot(theta, prior, type="l", col="blue", lwd=2,
main="Gamma Prior and Posterior",
xlab="Mean dmft", ylab="Density")
lines(theta, posterior, col="red", lwd=2)
legend("topright", c("Prior Gamma(3,1)", "Posterior"),
col=c("blue","red"), lwd=2)
Notes
For count data where outcome ~ Poisson(θ).
Posterior is:
θ∣y∼Gamma(α+∑yi, β+n)
| 1. BINOMIAL–BETA MODEL |
|---|
| ############################################################ |
| BINOMIAL–BETA POSTERIOR (Stroke Example) Formula: Y ~ Binomial(n, θ) θ ~ Beta(α0, β0) Posterior: θ | y ~ Beta(α0 + y , β0 + n - y) |
| Summary Measures: Mean = α_post / (α_post + β_post) Mode = (α_post - 1) / (α_post + β_post - 2) Variance = α_post * β_post / [ (α_post + β_post)^2 (α_post + β_post + 1) ] SD = sqrt(variance) Median = qbeta(0.5) Credible interval = qbeta(c(0.025, 0.975)) |
| ############################################################ |
r n <- 50 # sample size y <- 10 # number of successes (SICH cases) |
alpha0 <- 9
beta0 <- 93
| Posterior Hyperparameters |
alpha_post <- alpha0 + y
beta_post <- beta0 + (n - y)
| Posterior Summary Measures |
post_mean <- alpha_post / (alpha_post + beta_post)
post_mode <- (alpha_post - 1) / (alpha_post + beta_post - 2)
post_var <- (alpha_post * beta_post) /
((alpha_post + beta_post)^2 * (alpha_post + beta_post + 1))
post_sd <- sqrt(post_var)
post_med <- qbeta(0.5, alpha_post, beta_post)
post_CI <- qbeta(c(0.025, 0.975), alpha_post, beta_post)
# Print Summary
list(
posterior_mean = post_mean,
posterior_mode = post_mode,
posterior_sd = post_sd,
median = post_med,
credible_interval_95 = post_CI
)
## $posterior_mean
## [1] 0.125
##
## $posterior_mode
## [1] 0.12
##
## $posterior_sd
## [1] 0.02673704
##
## $median
## [1] 0.1233544
##
## $credible_interval_95
## [1] 0.07748574 0.18184089
| Plot Prior, Likelihood, Posterior |
theta <- seq(0, 0.5, length = 2000)
| Likelihood scaled to match density plot |
likelihood_raw <- dbinom(y, n, theta)
likelihood_scaled <- likelihood_raw / max(likelihood_raw)
prior <- dbeta(theta, alpha0, beta0)
posterior <- dbeta(theta, alpha_post, beta_post)
plot(theta, prior, type="l", col="blue", lwd=2,
ylab="Density", main="Binomial–Beta: Prior, Likelihood, Posterior")
lines(theta, likelihood_scaled * max(prior), col="darkgreen", lwd=2, lty=2)
lines(theta, posterior, col="red", lwd=3)
legend("topright",
legend=c("Prior Beta(9,93)", "Scaled Likelihood",
paste0("Posterior Beta(",alpha_post,",",beta_post,")")),
col=c("blue","darkgreen","red"), lwd=2, lty=c(1,2,1))
| 2. POISSON–GAMMA MODEL |
POISSON–GAMMA POSTERIOR (Caries Example) Model: Y_i ~ Poisson(λ) λ ~ Gamma(α0, β0) Posterior: λ | y ~ Gamma(α0 + Σy_i , β0 + n)
Summary Measures: Mean = α_post / β_post Variance = α_post / β_post^2 SD = sqrt(variance) Median = qgamma(0.5) 95% CI = qgamma(c(0.025,0.975))
# Example Caries Data from Chapter
y <- c(3,2,4)
n <- length(y)
sum_y <- sum(y)
# Prior
alpha0 <- 2
beta0 <- 1
# Posterior
alpha_post <- alpha0 + sum_y
beta_post <- beta0 + n
# Summary Measures
post_mean <- alpha_post / beta_post
post_var <- alpha_post / (beta_post^2)
post_sd <- sqrt(post_var)
post_med <- qgamma(0.5, alpha_post, beta_post)
post_CI <- qgamma(c(0.025,0.975), alpha_post, beta_post)
# Print results
list(
posterior_mean = post_mean,
posterior_sd = post_sd,
median = post_med,
credible_interval_95 = post_CI
)
## $posterior_mean
## [1] 2.75
##
## $posterior_sd
## [1] 0.8291562
##
## $median
## [1] 2.667131
##
## $credible_interval_95
## [1] 1.372790 4.597589
# Plot
theta <- seq(0,6, length=1000)
prior <- dgamma(theta, alpha0, beta0)
post <- dgamma(theta, alpha_post, beta_post)
plot(theta, prior, type="l", col="blue", lwd=2,
main="Poisson–Gamma: Prior vs Posterior",
ylab="Density")
lines(theta, post, col="red", lwd=2)
legend("topright",
legend=c("Prior Gamma(2,1)",
paste0("Posterior Gamma(",alpha_post,",",beta_post,")")),
col=c("blue","red"), lwd=2)
| 3. NORMAL–NORMAL MODEL |
NORMAL–NORMAL POSTERIOR (Dietary Example)
Model: y_i ~ Normal(μ, σ^2) μ ~ Normal(μ0, σ0^2)
Posterior: μ | y ~ Normal(μ_n , σ_n^2)
Formulas: μ_n = (μ0/σ0^2 + n*ȳ/σ^2) / (1/σ0^2 + n/σ^2) σ_n^2 = 1 / (1/σ0^2 + n/σ^2)
# Example dietary data
set.seed(99)
chol <- rnorm(50, mean=328, sd=20)
n <- length(chol)
ybar <- mean(chol)
s2 <- var(chol)
# Prior from chapter
mu0 <- 328
sigma0 <- 5.072
# Posterior
post_mean <- (mu0/sigma0^2 + n*ybar/s2) / (1/sigma0^2 + n/s2)
post_sd <- sqrt(1 / (1/sigma0^2 + n/s2))
CI_95 <- c(post_mean - 1.96*post_sd, post_mean + 1.96*post_sd)
list(
posterior_mean = post_mean,
posterior_sd = post_sd,
credible_interval_95 = CI_95
)
## $posterior_mean
## [1] 324.1855
##
## $posterior_sd
## [1] 2.507436
##
## $credible_interval_95
## [1] 319.2710 329.1001
# Plot Prior, Likelihood, Posterior
mu <- seq(250,400,length=500)
prior <- dnorm(mu, mu0, sigma0)
lik <- dnorm(mu, ybar, sqrt(s2/n))
post <- dnorm(mu, post_mean, post_sd)
plot(mu, prior, type="l", col="blue", lwd=2,
main="Normal–Normal: Prior, Likelihood & Posterior")
lines(mu, lik, col="darkgreen", lwd=2, lty=2)
lines(mu, post, col="red", lwd=3)
legend("topright", legend=c("Prior", "Likelihood", "Posterior"),
col=c("blue","darkgreen","red"), lwd=2, lty=c(1,2,1))
| 1. ONE-SAMPLE BAYESIAN ANALYSIS FOR NORMAL MEAN (μ) |
## ------------------------------------------------------------
## Example dataset (replace with your own sample)
## ------------------------------------------------------------
set.seed(123)
y <- rnorm(20, mean = 120, sd = 15)
n <- length(y)
## Summary statistics
sample_mean <- mean(y)
sample_sd <- sd(y)
sample_var <- var(y)
summary(y)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 90.5 112.6 121.8 122.1 128.2 146.8
## Plot: Histogram + sample mean
hist(y, main = "Histogram of the Sample", col="lightgray", breaks=10)
abline(v = sample_mean, col="red", lwd=2)
## ------------------------------------------------------------
## Bayesian Conjugate Prior:
## μ ~ Normal(μ0, τ0²)
## y | μ ~ Normal(μ, σ²)
##
## Posterior:
## μ | y ~ Normal( μn , τn² )
##
## Formulas:
## τn² = 1 / (1/τ0² + n/σ²)
## μn = τn² * ( μ0/τ0² + n*ȳ/σ² )
## ------------------------------------------------------------
mu0 <- 110 # prior mean
tau0 <- 10 # prior SD
sigma <- sample_sd # using sample SD as known variance
# Posterior variance and posterior mean
tau_n2 <- 1 / (1/tau0^2 + n/sigma^2)
tau_n <- sqrt(tau_n2)
mu_n <- tau_n2 * (mu0/tau0^2 + n*sample_mean/sigma^2)
# Posterior print
list(
posterior_mean = mu_n,
posterior_sd = tau_n,
CI_95 = c(mu_n - 1.96*tau_n, mu_n + 1.96*tau_n)
)
## $posterior_mean
## [1] 120.9581
##
## $posterior_sd
## [1] 3.101537
##
## $CI_95
## [1] 114.8790 127.0371
## Plot Prior vs Likelihood vs Posterior
mu_grid <- seq(sample_mean - 40, sample_mean + 40, length=400)
prior <- dnorm(mu_grid, mu0, tau0)
likelihood <- dnorm(mu_grid, sample_mean, sigma/sqrt(n))
posterior <- dnorm(mu_grid, mu_n, tau_n)
plot(mu_grid, prior, type="l", col="blue", lwd=2,
main="Prior, Likelihood & Posterior", ylab="Density")
lines(mu_grid, likelihood, col="darkgreen", lwd=2, lty=2)
lines(mu_grid, posterior, col="red", lwd=2)
legend("topright", c("Prior","Likelihood","Posterior"),
col=c("blue","darkgreen","red"), lwd=2, lty=c(1,2,1))
| 2. TWO–SAMPLE BAYESIAN COMPARISON (μ₁ – μ₂) |
## ------------------------------------------------------------
## Example data (replace these with your two groups)
## ------------------------------------------------------------
set.seed(321)
group1 <- rnorm(25, mean = 130, sd = 12)
group2 <- rnorm(22, mean = 118, sd = 14)
n1 <- length(group1)
n2 <- length(group2)
mean1 <- mean(group1)
mean2 <- mean(group2)
var1 <- var(group1)
var2 <- var(group2)
## Summary statistics
list(
group1_mean = mean1,
group2_mean = mean2,
difference = mean1 - mean2,
group1_sd = sqrt(var1),
group2_sd = sqrt(var2)
)
## $group1_mean
## [1] 132.8133
##
## $group2_mean
## [1] 116.0981
##
## $difference
## [1] 16.71524
##
## $group1_sd
## [1] 10.39231
##
## $group2_sd
## [1] 14.37212
## Plot: Side-by-side boxplots
boxplot(group1, group2,
names=c("Group 1", "Group 2"),
col=c("lightblue","lightpink"),
main="Boxplot of Two Groups")
## ------------------------------------------------------------
## Bayesian model:
## μ₁, μ₂ have independent Normal priors
## Posterior for difference:
## (μ₁ – μ₂) | data ~ Normal( mean_diff_post , var_diff_post )
##
## Posterior mean:
## μ1_n = ...
## μ2_n = ...
## diff_post_mean = μ1_n - μ2_n
##
## Posterior variance:
## var_diff_post = τ1_n^2 + τ2_n^2
## ------------------------------------------------------------
# Priors
mu01 <- 125; tau01 <- 10
mu02 <- 125; tau02 <- 10
# "Known" variances taken from sample variances
sigma1 <- sqrt(var1)
sigma2 <- sqrt(var2)
# Posterior variances
tau1_n2 <- 1 / (1/tau01^2 + n1/sigma1^2)
tau2_n2 <- 1 / (1/tau02^2 + n2/sigma2^2)
# Posterior means
mu1_n <- tau1_n2 * (mu01/tau01^2 + n1*mean1/sigma1^2)
mu2_n <- tau2_n2 * (mu02/tau02^2 + n2*mean2/sigma2^2)
# Posterior for difference
diff_mean_post <- mu1_n - mu2_n
diff_var_post <- tau1_n2 + tau2_n2
diff_sd_post <- sqrt(diff_var_post)
list(
posterior_mean_difference = diff_mean_post,
posterior_sd = diff_sd_post,
CI_95 = c(diff_mean_post - 1.96*diff_sd_post,
diff_mean_post + 1.96*diff_sd_post)
)
## $posterior_mean_difference
## [1] 15.62761
##
## $posterior_sd
## [1] 3.567104
##
## $CI_95
## [1] 8.63609 22.61914
## Plot posterior of μ₁ – μ₂
dgrid <- seq(diff_mean_post - 40, diff_mean_post + 40, length=500)
post_diff <- dnorm(dgrid, diff_mean_post, diff_sd_post)
plot(dgrid, post_diff, type="l", col="purple", lwd=2,
main="Posterior Distribution of (μ₁ – μ₂)",
xlab="Difference in Means", ylab="Posterior Density")
abline(v = diff_mean_post, col="red", lwd=2)
| 3. POSTERIOR PREDICTIVE DISTRIBUTION |
## Posterior predictive distribution:
## y_new | data ~ Normal( μ_n , σ² + τ_n² )
post_pred_mean <- mu_n
post_pred_sd <- sqrt(sigma^2 + tau_n^2)
# Draw 1000 posterior predictive samples
y_new <- rnorm(1000, post_pred_mean, post_pred_sd)
hist(y_new, col="lightgray",
main="Posterior Predictive Distribution",
xlab="Predicted Observation")
abline(v = post_pred_mean, col="red", lwd=2)
| Simulate example data (replace with your own vector y) |
n <- 30
# Simulate sample from N(120, 15^2)
y <- rnorm(n, mean = 120, sd = 15)
# Basic sample summaries
# sample mean: \bar{y} = (1/n) \sum y_i
ybar <- mean(y) # sample mean
s <- sd(y) # sample standard deviation (sqrt(sample variance))
s2 <- var(y) # sample variance
# Print quick summaries
cat("n =", n, "\n")
## n = 30
cat("sample mean =", round(ybar,3), "sample SD =", round(s,3), "\n\n")
## sample mean = 119.773 sample SD = 13.265
# ---------------------------
# Analytic Normal-Normal posterior (treat sigma as known = sample sd)
# ---------------------------
# Prior for mu
mu0 <- 110 # prior mean (choose)
tau0 <- 10 # prior sd (choose)
# Treat sigma (observation SD) as known for conjugacy
sigma <- s # NOTE: this is an approximation; better to use composition for unknown sigma
# Posterior variance (tau_n^2) and mean (mu_n)
tau_n2 <- 1 / (1/tau0^2 + n / sigma^2) # posterior variance of mu
tau_n <- sqrt(tau_n2) # posterior sd
mu_n <- tau_n2 * (mu0 / tau0^2 + n * ybar / sigma^2) # posterior mean
# Show posterior summary
cat("Conjugate Normal-Normal posterior (approx treating sigma known):\n")
## Conjugate Normal-Normal posterior (approx treating sigma known):
cat(" posterior mean =", round(mu_n,4), "posterior sd =", round(tau_n,4), "\n\n")
## posterior mean = 119.2317 posterior sd = 2.3537
# ---------------------------
# Composition sampling: unknown sigma (noninformative prior p(mu,sigma^2) \propto sigma^{-2})
# Steps:
# 1) draw sigma^2_k = (n-1)*s^2 / chi2_{n-1}
# 2) draw mu_k ~ Normal(ybar, sigma^2_k / n)
# ---------------------------
K <- 5000 # number of posterior samples
mu_samples <- numeric(K)
sigma2_samples <- numeric(K)
for (k in 1:K) {
# draw from chi-square with df = n-1
chi_val <- rchisq(1, df = n - 1)
# inverse-chi-square draw for sigma^2 (scale form)
sigma2_k <- (n - 1) * s2 / chi_val
sigma2_samples[k] <- sigma2_k
# conditional draw for mu
mu_samples[k] <- rnorm(1, mean = ybar, sd = sqrt(sigma2_k / n))
}
# Summaries from composition sampling
# posterior mean of mu, posterior sd, credible interval
mu_post_mean <- mean(mu_samples)
mu_post_sd <- sd(mu_samples)
mu_post_CI <- quantile(mu_samples, probs = c(0.025, 0.975))
cat("Composition sampling results for mu:\n")
## Composition sampling results for mu:
cat(" mean =", round(mu_post_mean,4), " sd =", round(mu_post_sd,4),
" 95% CI =", round(mu_post_CI[1],4), "-", round(mu_post_CI[2],4), "\n\n")
## mean = 119.7804 sd = 2.5561 95% CI = 114.7432 - 124.8056
# Diagnostics: traceplot and density
par(mfrow = c(2,1))
plot(mu_samples, type = "l", main = "Traceplot: mu samples (composition)", ylab = "mu")
hist(mu_samples, breaks = 40, main = "Posterior density of mu (composition)", xlab = "mu")
par(mfrow = c(1,1))
Notes (inline & interpretation):
The composition sampler generates joint draws from the exact posterior under the noninformative prior.
Posterior mean of μ from composition should be close to sample mean. Posterior spread accounts for uncertainty in σ².
Use the composition samples to compute any marginal summaries, HPD, or predictive draws.
# ---------------------------
# Simulate two independent groups (replace with your real data)
# ---------------------------
set.seed(2026)
group1 <- rnorm(28, mean = 130, sd = 12) # group 1 data
group2 <- rnorm(24, mean = 118, sd = 14) # group 2 data
# Basic summaries
n1 <- length(group1); n2 <- length(group2)
ybar1 <- mean(group1); ybar2 <- mean(group2)
s1 <- sd(group1); s2 <- sd(group2)
cat("Group1: n=", n1, " mean=", round(ybar1,3), " sd=", round(s1,3), "\n")
## Group1: n= 28 mean= 128.053 sd= 12.824
cat("Group2: n=", n2, " mean=", round(ybar2,3), " sd=", round(s2,3), "\n\n")
## Group2: n= 24 mean= 119.373 sd= 11.783
# ---------------------------
# Conjugate Normal-Normal approach (approx: treat sigma_j as known = sample sd)
# Priors for mu1 and mu2 (choose weakly informative)
# ---------------------------
mu01 <- 120; tau01 <- 20 # prior mean and sd for mu1
mu02 <- 120; tau02 <- 20 # prior mean and sd for mu2
# Use sample SDs as plug-in observation SD (approximation)
sigma1 <- s1
sigma2 <- s2
# Posterior variance and means for each mu_j
tau1n2 <- 1 / (1 / tau01^2 + n1 / sigma1^2)
tau2n2 <- 1 / (1 / tau02^2 + n2 / sigma2^2)
mu1n <- tau1n2 * (mu01 / tau01^2 + n1 * ybar1 / sigma1^2)
mu2n <- tau2n2 * (mu02 / tau02^2 + n2 * ybar2 / sigma2^2)
# Posterior for difference mu1 - mu2
diff_mean <- mu1n - mu2n
diff_var <- tau1n2 + tau2n2
diff_sd <- sqrt(diff_var)
diff_CI <- c(diff_mean - 1.96 * diff_sd, diff_mean + 1.96 * diff_sd)
list(
posterior_mean_diff = diff_mean,
posterior_sd_diff = diff_sd,
CI_95 = diff_CI
)
## $posterior_mean_diff
## [1] 8.553858
##
## $posterior_sd_diff
## [1] 3.389847
##
## $CI_95
## [1] 1.909757 15.197959
# Plot posterior density of difference
xgrid <- seq(diff_mean - 4*diff_sd, diff_mean + 4*diff_sd, length.out = 400)
plot(xgrid, dnorm(xgrid, diff_mean, diff_sd), type="l", col="purple", lwd=2,
main="Posterior (approx) of μ1 − μ2", xlab="μ1 − μ2")
abline(v = diff_mean, col="red", lwd=2)
Notes & cautions:
The plug-in approach (using sample SDs as known) is an approximation. For full Bayesian inference with unknown σ₁² and σ₂², use composition or hierarchical modeling sampling σ² and then μ conditionally.
If you prefer exact composition: sample σ1² ~ Inv-χ²(n1-1, s1²) and σ2² similarly, then sample μ1 and μ2 conditional on each σ²; compute their difference per draw to form the posterior distribution.
# Simulated categorical data (replace with real counts)
counts <- c(180, 41, 216, 64) # example 2x2 table flattened or counts per cell
n_cat <- sum(counts)
# Prior Dirichlet hyperparameters (uninformative = 1 for each category)
alpha <- rep(1, length(counts))
# Posterior Dirichlet parameters
alpha_post <- alpha + counts
# Posterior means
theta_post_mean <- alpha_post / sum(alpha_post)
list(
counts = counts,
prior_alpha = alpha,
posterior_alpha = alpha_post,
posterior_mean = round(theta_post_mean,4)
)
## $counts
## [1] 180 41 216 64
##
## $prior_alpha
## [1] 1 1 1 1
##
## $posterior_alpha
## [1] 181 42 217 65
##
## $posterior_mean
## [1] 0.3584 0.0832 0.4297 0.1287
# Plot prior vs posterior (barplots)
par(mfrow=c(1,2))
barplot(alpha/sum(alpha), main="Prior mean (Dirichlet)", ylim=c(0,1))
barplot(theta_post_mean, main="Posterior mean (Dirichlet)", ylim=c(0,1))
par(mfrow=c(1,1))
library(MASS) # MUST be loaded
## Warning: package 'MASS' was built under R version 4.4.3
# ---------------------------
# Simulate a simple regression dataset (replace with your real X, y)
# ---------------------------
set.seed(2027)
n <- 120
x <- runif(n, 18, 40) # predictor (e.g., BMI, age...)
beta0_true <- 0.8
beta1_true <- 0.04
sigma_true <- 0.28
y_reg <- beta0_true + beta1_true * x + rnorm(n, sd = sigma_true)
# Build design matrix with intercept
X <- cbind(1, x)
d <- ncol(X) - 1 # number of predictors excluding intercept
# Frequentist OLS estimates (for posterior center)
XtX_inv <- solve(t(X) %*% X)
beta_hat <- XtX_inv %*% t(X) %*% y_reg
resid <- y_reg - X %*% beta_hat
s2 <- sum(resid^2) / (n - d - 1) # residual variance estimator (s^2)
# Print OLS summaries
cat("OLS beta_hat:\n"); print(round(beta_hat,4))
## OLS beta_hat:
## [,1]
## 0.6623
## x 0.0441
cat("Residual s^2 =", round(s2,5), "\n\n")
## Residual s^2 = 0.06706
# ---------------------------
# Composition sampling
# ---------------------------
K <- 4000
sigma2_samples_reg <- numeric(K)
beta_samples <- matrix(NA, nrow = K, ncol = length(beta_hat))
nu <- n - d - 1 # degrees of freedom for Inv-chi-square
for (k in 1:K) {
# sample sigma^2 from Inv-chi-square(n-d-1, s2)
chi_val <- rchisq(1, df = nu)
sigma2_k <- nu * s2 / chi_val
sigma2_samples_reg[k] <- sigma2_k
# conditional sample for beta ~ N(beta_hat, sigma2_k * (X'X)^{-1})
beta_samples[k, ] <- mvrnorm(1, mu = as.vector(beta_hat), Sigma = sigma2_k * XtX_inv)
}
# Posterior summaries for beta
beta_post_mean <- colMeans(beta_samples)
beta_post_sd <- apply(beta_samples, 2, sd)
names(beta_post_mean) <- c("beta0","beta1")
list(
posterior_mean_beta = round(beta_post_mean,4),
posterior_sd_beta = round(beta_post_sd,4)
)
## $posterior_mean_beta
## beta0 beta1
## 0.6613 0.0441
##
## $posterior_sd_beta
## [1] 0.1151 0.0038
# Joint posterior plot: scatter of beta0 vs beta1
plot(beta_samples[,1], beta_samples[,2], pch=16, cex=0.4,
main="Joint posterior samples: beta0 vs beta1",
xlab="beta0", ylab="beta1")
abline(v = beta_hat[1], col="blue", lwd=2) # frequentist center
abline(h = beta_hat[2], col="blue", lwd=2)
# 5.1 One-sample predictive using composition draws (from above mu_samples, sigma2_samples)
# Draw 1000 predictive observations using the K composition draws for the one-sample example
Kpp <- 1000
idx <- sample(1:K, size = Kpp, replace = TRUE) # sample indices from previous composition draws
y_pred_samples <- rnorm(Kpp, mean = mu_samples[idx], sd = sqrt(sigma2_samples[idx]))
# Plot predictive histogram
hist(y_pred_samples, breaks = 30, main = "Posterior predictive (one-sample)", xlab = "y_new", col = "lightgray")
abline(v = mean(y_pred_samples), col = "red", lwd = 2)
# 5.2 Regression predictive for x* = 30 (BMI=30 example)
xstar <- c(1, 30) # intercept + covariate
# For each posterior beta and sigma2 sample compute predictive mean & draw
Kreg <- nrow(beta_samples)
yreg_pred <- numeric(Kreg)
for (k in 1:Kreg) {
mu_pred_k <- sum(beta_samples[k, ] * xstar)
# predictive variance: sigma2_k * (1 + xstar' (X'X)^{-1} xstar)
pred_var_k <- sigma2_samples_reg[k] * (1 + t(xstar) %*% XtX_inv %*% xstar)
yreg_pred[k] <- rnorm(1, mean = mu_pred_k, sd = sqrt(pred_var_k))
}
hist(yreg_pred, breaks = 40, main = "Posterior predictive for x*=30 (regression)", col="lightblue")
abline(v = mean(yreg_pred), col = "red", lwd = 2)
set.seed(2025)
# ---------- Package setup ----------
needed <- c("coda", "ggplot2")
to_install <- needed[!(needed %in% installed.packages()[, "Package"])]
if(length(to_install)) install.packages(to_install, repos="https://cloud.r-project.org")
library(coda)
## Warning: package 'coda' was built under R version 4.4.3
library(ggplot2)
# ===================================================================
# PART A — Gibbs sampler for Normal model (conjugate priors)
# Model:
# y_i ~ Normal(mu, sigma2)
# mu ~ Normal(mu0, tau2) (tau2 = prior variance)
# sigma2 ~ Inverse-Gamma(a0, b0) (parameterization where density ~ b0^a0 / Gamma(a0) * sigma2^{-a0-1} * exp(-b0/sigma2))
# Gibbs updates: mu | sigma2, y ~ Normal(...)
# sigma2 | mu, y ~ Inverse-Gamma(...)
# ===================================================================
# Simulate data
n <- 80
true_mu <- 2.0
true_sigma2 <- 1.5
y <- rnorm(n, mean = true_mu, sd = sqrt(true_sigma2))
# Prior hyperparameters
mu0 <- 0 # prior mean
tau2 <- 10 # prior var (large = vague)
a0 <- 2.0 # IG shape
b0 <- 1.0 # IG scale
# Gibbs sampler
gibbs_normal <- function(y, niter = 5000, burn = 1000) {
n <- length(y)
# storage
mu_chain <- numeric(niter)
sigma2_chain <- numeric(niter)
# init
mu_curr <- mean(y)
sigma2_curr <- var(y)
for (i in 1:niter) {
# mu | sigma2, y (Normal)
mu_var <- 1 / (n / sigma2_curr + 1 / tau2)
mu_mean <- mu_var * (sum(y) / sigma2_curr + mu0 / tau2)
mu_curr <- rnorm(1, mean = mu_mean, sd = sqrt(mu_var))
# sigma2 | mu, y (Inverse-Gamma)
a_n <- a0 + n/2
ssq <- sum((y - mu_curr)^2)
b_n <- b0 + 0.5 * ssq
# draw from Inverse-Gamma by drawing from Gamma for 1/sigma2
inv_sigma2 <- rgamma(1, shape = a_n, rate = b_n) # rate = b_n
sigma2_curr <- 1 / inv_sigma2
mu_chain[i] <- mu_curr
sigma2_chain[i] <- sigma2_curr
}
res <- list(mu = mu_chain[-(1:burn)], sigma2 = sigma2_chain[-(1:burn)], raw_mu = mu_chain, raw_sigma2 = sigma2_chain)
return(res)
}
# Run Gibbs for Normal model
gibbs_res <- gibbs_normal(y, niter = 8000, burn = 2000)
# Summaries
cat("== Normal model posterior summaries ==\n")
## == Normal model posterior summaries ==
cat("Posterior mean mu:", mean(gibbs_res$mu), "True mu:", true_mu, "\n")
## Posterior mean mu: 2.054085 True mu: 2
cat("Posterior mean sigma2:", mean(gibbs_res$sigma2), "True sigma2:", true_sigma2, "\n\n")
## Posterior mean sigma2: 1.5367 True sigma2: 1.5
# ===================================================================
# PART B — Hierarchical Beta-Binomial model (Gibbs for theta_j, MH for alpha,beta)
# Model:
# For j = 1..J:
# y_j ~ Binomial(n_j, theta_j)
# theta_j ~ Beta(alpha, beta)
# alpha ~ Gamma(a_alpha, b_alpha)
# beta ~ Gamma(a_beta, b_beta)
# Gibbs steps:
# theta_j | rest ~ Beta(alpha + y_j, beta + n_j - y_j)
# alpha, beta | thetas: no closed form -> Metropolis steps on log(alpha), log(beta)
# This is a Metropolis-within-Gibbs sampler.
# ===================================================================
# Simulate hierarchical binomial data
J <- 20
true_alpha <- 2.5
true_beta <- 5.0
set.seed(123)
theta_true <- rbeta(J, true_alpha, true_beta)
n_j <- sample(20:80, J, replace = TRUE)
y_j <- rbinom(J, n_j, theta_true)
# Hyperpriors for alpha and beta
# Use Gamma priors on alpha and beta (shape, rate)
a_alpha <- 1.0; b_alpha <- 0.1
a_beta <- 1.0; b_beta <- 0.1
# Log posterior for alpha, beta (given current theta's)
log_post_alpha_beta <- function(alpha, beta, thetas) {
if(alpha <= 0 || beta <= 0) return(-Inf)
J <- length(thetas)
# log prior (Gamma(shape, rate))
log_prior <- dgamma(alpha, shape = a_alpha, rate = b_alpha, log = TRUE) +
dgamma(beta, shape = a_beta, rate = b_beta, log = TRUE)
# log likelihood of thetas given alpha,beta: product Beta(thetas | alpha,beta)
# log density for Beta: (alpha-1)*log(theta) + (beta-1)*log(1-theta) - lbeta(alpha,beta)
sum_log_terms <- sum((alpha - 1) * log(thetas) + (beta - 1) * log(1 - thetas))
log_norm_const <- - J * lbeta(alpha, beta)
return(log_prior + sum_log_terms + log_norm_const)
}
# Metropolis-within-Gibbs sampler
mh_within_gibbs_beta_binomial <- function(y, n_j, niter = 10000, burn = 2000,
prop_sd_log_alpha = 0.15, prop_sd_log_beta = 0.15,
init_alpha = 1, init_beta = 1) {
J <- length(y)
# storage
thetas_chain <- matrix(NA, nrow = niter, ncol = J)
alpha_chain <- numeric(niter)
beta_chain <- numeric(niter)
# initialize
alpha_curr <- init_alpha
beta_curr <- init_beta
# init thetas using posterior given init hyperparams
theta_curr <- rbeta(J, alpha_curr + y, beta_curr + n_j - y)
for (it in 1:niter) {
# 1) Gibbs: sample each theta_j | rest
for (j in 1:J) {
theta_curr[j] <- rbeta(1, alpha_curr + y[j], beta_curr + n_j[j] - y[j])
# avoid exact 0/1
theta_curr[j] <- pmin(pmax(theta_curr[j], 1e-10), 1-1e-10)
}
# 2) Metropolis update for alpha (update log(alpha))
log_alpha_prop <- log(alpha_curr) + rnorm(1, 0, prop_sd_log_alpha)
alpha_prop <- exp(log_alpha_prop)
log_r_alpha <- log_post_alpha_beta(alpha_prop, beta_curr, theta_curr) -
log_post_alpha_beta(alpha_curr, beta_curr, theta_curr) +
log_alpha_prop - log(alpha_curr) # Jacobian if we propose on log-scale (but symmetric is fine; include if prior not on log)
# For symmetric normal on log scale, Jacobian terms cancel so extra term not required; but adding doesn't harm much.
if (log(runif(1)) < log_r_alpha) alpha_curr <- alpha_prop
# 3) Metropolis update for beta (update log(beta))
log_beta_prop <- log(beta_curr) + rnorm(1, 0, prop_sd_log_beta)
beta_prop <- exp(log_beta_prop)
log_r_beta <- log_post_alpha_beta(alpha_curr, beta_prop, theta_curr) -
log_post_alpha_beta(alpha_curr, beta_curr, theta_curr) +
log_beta_prop - log(beta_curr)
if (log(runif(1)) < log_r_beta) beta_curr <- beta_prop
# store
thetas_chain[it, ] <- theta_curr
alpha_chain[it] <- alpha_curr
beta_chain[it] <- beta_curr
}
# drop burn-in
out <- list(thetas = thetas_chain[-(1:burn), , drop = FALSE],
alpha = alpha_chain[-(1:burn)],
beta = beta_chain[-(1:burn)],
raw = list(thetas_raw = thetas_chain, alpha_raw = alpha_chain, beta_raw = beta_chain))
return(out)
}
# Run hierarchical sampler
set.seed(2025)
hier_res <- mh_within_gibbs_beta_binomial(y_j, n_j, niter = 12000, burn = 3000,
prop_sd_log_alpha = 0.12, prop_sd_log_beta = 0.12,
init_alpha = 1.0, init_beta = 1.0)
# Summaries
cat("== Hierarchical Beta-Binomial posterior summaries ==\n")
## == Hierarchical Beta-Binomial posterior summaries ==
cat("True alpha:", true_alpha, "Posterior mean alpha:", mean(hier_res$alpha), "\n")
## True alpha: 2.5 Posterior mean alpha: 3.418426
cat("True beta:", true_beta, "Posterior mean beta:", mean(hier_res$beta), "\n\n")
## True beta: 5 Posterior mean beta: 6.765439
cat("Posterior mean of first 6 group thetas vs true thetas:\n")
## Posterior mean of first 6 group thetas vs true thetas:
print(data.frame(true = theta_true[1:6], post_mean = colMeans(hier_res$thetas)[1:6], y = y_j[1:6], n = n_j[1:6]))
## true post_mean y n
## 1 0.2311690 0.2905453 17 60
## 2 0.2891404 0.3434913 10 29
## 3 0.7014177 0.6418564 30 42
## 4 0.3474979 0.3815572 18 46
## 5 0.3595317 0.3746800 30 79
## 6 0.4312335 0.4566386 34 72
# ===================================================================
# PART C — MCMC diagnostics
# - Traceplots
# - Autocorrelation (ACF)
# - Effective Sample Size (ESS)
# Use coda where convenient
# ===================================================================
# ---- 1) Normal Gibbs diagnostics ----
mu_mcmc <- mcmc(gibbs_res$raw_mu) # include raw chain to inspect mixing & burn visually
sigma2_mcmc <- mcmc(gibbs_res$raw_sigma2)
# Traceplots
par(mfrow = c(2,1))
plot(mu_mcmc, main = "Traceplot: mu (Normal Gibbs)", ylab = "mu")
plot(sigma2_mcmc, main = "Traceplot: sigma2 (Normal Gibbs)", ylab = "sigma2")
# ACF
par(mfrow = c(2,1))
acf(as.numeric(gibbs_res$raw_mu), main = "ACF: mu (Normal Gibbs)")
acf(as.numeric(gibbs_res$raw_sigma2), main = "ACF: sigma2 (Normal Gibbs)")
# Effective sample size
ess_mu <- effectiveSize(gibbs_res$mu)
ess_sigma2 <- effectiveSize(gibbs_res$sigma2)
cat("\nNormal Gibbs ESS: mu =", round(ess_mu,2), ", sigma2 =", round(ess_sigma2,2), "\n")
##
## Normal Gibbs ESS: mu = 6000 , sigma2 = 6000
# ---- 2) Hierarchical model diagnostics (alpha, beta, and a few thetas) ----
alpha_mcmc <- mcmc(hier_res$raw$alpha_raw)
beta_mcmc <- mcmc(hier_res$raw$beta_raw)
theta1_mcmc <- mcmc(hier_res$raw$thetas_raw[,1])
theta2_mcmc <- mcmc(hier_res$raw$thetas_raw[,2])
# Traceplots (alpha & beta)
par(mfrow = c(2,1))
plot(alpha_mcmc, main="Traceplot: alpha (hierarchical raw)", ylab="alpha")
plot(beta_mcmc, main="Traceplot: beta (hierarchical raw)", ylab="beta")
# Traceplot for a couple of thetas
par(mfrow = c(2,1))
plot(theta1_mcmc, main="Traceplot: theta[1] (hierarchical raw)", ylab="theta1")
plot(theta2_mcmc, main="Traceplot: theta[2] (hierarchical raw)", ylab="theta2")
# ACF plots
par(mfrow = c(3,1))
acf(alpha_mcmc, main="ACF: alpha")
acf(beta_mcmc, main="ACF: beta")
acf(theta1_mcmc, main="ACF: theta[1]")
# Effective sample sizes
ess_alpha <- effectiveSize(hier_res$alpha)
ess_beta <- effectiveSize(hier_res$beta)
ess_theta1 <- effectiveSize(hier_res$thetas[,1])
cat("\nHierarchical model ESS:\n")
##
## Hierarchical model ESS:
cat(" alpha:", round(ess_alpha,2), " beta:", round(ess_beta,2),
" theta[1]:", round(ess_theta1,2), "\n\n")
## alpha: 47.68 beta: 52.75 theta[1]: 8221.41
# ---- 3) Quick diagnostics summary plots using ggplot (posterior densities) ----
# Convert to data frames for ggplot density overlays
df_norm <- data.frame(mu = gibbs_res$mu, sigma2 = gibbs_res$sigma2)
p1 <- ggplot(df_norm, aes(x = mu)) + geom_density() + ggtitle("Posterior density: mu (Normal Gibbs)")
p2 <- ggplot(df_norm, aes(x = sigma2)) + geom_density() + ggtitle("Posterior density: sigma2 (Normal Gibbs)")
print(p1); print(p2)
df_hier_ab <- data.frame(alpha = hier_res$alpha, beta = hier_res$beta)
p3 <- ggplot(df_hier_ab, aes(x = alpha)) + geom_density() + ggtitle("Posterior density: alpha (hierarchical)")
p4 <- ggplot(df_hier_ab, aes(x = beta)) + geom_density() + ggtitle("Posterior density: beta (hierarchical)")
print(p3); print(p4)
# ===================================================================
# Optional: Gelman-Rubin (R-hat) requires multiple chains.
# For quick check, run 2 short independent chains for alpha & beta and compute R-hat.
# ===================================================================
run_two_chains <- function() {
res1 <- mh_within_gibbs_beta_binomial(y_j, n_j, niter = 6000, burn = 1500,
prop_sd_log_alpha = 0.12, prop_sd_log_beta = 0.12,
init_alpha = 0.8, init_beta = 0.8)
res2 <- mh_within_gibbs_beta_binomial(y_j, n_j, niter = 6000, burn = 1500,
prop_sd_log_alpha = 0.12, prop_sd_log_beta = 0.12,
init_alpha = 3.0, init_beta = 6.0)
# form mcmc.list
m1 <- mcmc(res1$alpha)
m2 <- mcmc(res2$alpha)
mlist <- mcmc.list(m1, m2)
gr_alpha <- gelman.diag(mlist, autoburnin = FALSE)
return(gr_alpha)
}
# (Run if you want R-hat check; can be somewhat slow)
# gr_alpha <- run_two_chains()
# print(gr_alpha)
# Final note to user
cat("\nFinished. You now have:\n - Gibbs sampler for Normal model (closed form).\n - Hierarchical Beta-Binomial sampler (Gibbs for theta_j + MH for alpha/beta).\n - Traceplots, ACFs, and ESS computed for main parameters.\n\n")
##
## Finished. You now have:
## - Gibbs sampler for Normal model (closed form).
## - Hierarchical Beta-Binomial sampler (Gibbs for theta_j + MH for alpha/beta).
## - Traceplots, ACFs, and ESS computed for main parameters.