Though a variety of R packages provide good performance of Bayesian Linear Regression, I feel a strong urge to implement it by plain code from scatch. This is an example of reproduction bayes.regress() from BayesSummaryStatLM package. One should fully understand how the math behand works after the practice.
Conditional posterior distributions regarding \(\beta\) prior
beta.prior=list(type = “flat”). A Uniform prior distribution for \(\beta\).
beta.prior=list(type = “mvnorm.known”). Multivariate Normal with known mean vector \(\mu\) and known covariance matrix \(C\).
beta.prior=list(type = “mvnorm.unknown”). Multivariate Normal with unknown mean vector μ and unknown covariance matrix C. This prior also includes the hyperpriors for \(\mu\) and \(C\), where \(\mu \sim Multivariate Normal (\eta, D)\), and \(C^{-1} \sim Wishart(d.f. = \lambda, scale matrix = V)\); \(\eta, D, \lambda, V\) assumed known.
\(\beta | \sigma^2, \mu, C^{-1}, X, Y \sim Normal_{k+1}(mean=(C^{-1}+\sigma^{-2}(X'X))^{-1}(C^{-1}\mu +\sigma^{-2}X'Y),covariance=(C^{-1}+\sigma^{-2}(X'X)^{-1}))\)
\(\mu | \beta, \sigma^2, C^{-1}, X, Y \sim Normal_{k+1} (mean=(D^{-1}+C^{-1})^{-1}(C^{-1}\beta+D^{-1}\eta), covariance=(D^{-1}+C^{-1})^{-1})\)
\(C^{-1} | \beta, \sigma^2, \mu, X, Y \sim Wishart_{k+1} (d.f. = (1+\lambda), scale matrix = (V^{-1}+ (\beta - \mu)(\beta - \mu)')^{-1})\)
Conditional posterior distributions regarding \(σ^2\) prior
sigmasq.prior=list(type = “inverse.gamma”). Inverse Gamma distribution with known shape and scale parameters \(a\) and \(b\), respectively.
sigmasq.prior=list(type = “sigmasq.inverse”). Inverse sigma-squared distribution.
set.seed(1234)
num.samp <- 100 # number of data values to simulate
# The first value of the beta vector is the y-intercept:
beta <- c(-0.35, 0.88, -0.46, 0.43, -1.71)
# Calculate the number of predictor variables:
num.pred <- length(beta)-1
rho <- 0.0 # correlation between predictors
mean.vec <- rep(0,num.pred)
sigma.mat <- matrix(rho,num.pred,num.pred) + diag(1-rho,num.pred)
sigmasq.sim <- 0.05
# Simulate predictor variables:
# library(MASS)
x.pre <- mvrnorm(num.samp, mu=mean.vec, Sigma=sigma.mat)
# Add leading column of 1's to x.pre for y-intercept:
x <- cbind(rep(1,num.samp),x.pre)
epsilon <- rnorm(num.samp, mean=0, sd=sqrt(sigmasq.sim))
y <- as.numeric( x %*% as.matrix(beta) + epsilon)
# See the data
plot(y)
for(i in 2:5){
points(x[,i],col=i)
}
# Set the number of MCMC samples
Tsamp.out <- 100
Tsamp.out <- Tsamp.out+1
## Compute summary statistics
xtx <- t(x)%*%x
xty <- t(x)%*%y
yty <- t(y)%*%y
ytx<-t(xty)
xtx.inv = chol2inv(chol(xtx)) # compute (X'X)^(-1)
# num.predictors <- dim(xtx)[1]
betahat <- matrix(NA, nrow=Tsamp.out, ncol = dim(xtx)[1]) # betahat = 101 \times 5
sigmasqhat <- rep(NA,Tsamp.out) # sigmasqhat = 101
Start
# set starting value for sigmasqhat
sigmasq.init = 0.1;
sigmasqhat[1] <- sigmasq.init
# posterior mean of betahat
betahat.mean <- xtx.inv%*%(xty)
Posterior variance of betahat \(\hat{\beta}\)
# posterior variance of betahat
for (i in 2:Tsamp.out){
# simulate betahat
betahat[i,] <- mvrnorm(n=1, mu=betahat.mean, Sigma=sigmasqhat[i-1]*xtx.inv)
# simulate sigmasqhat
sigmasqscale.pre <- (yty - t(betahat[i,]) %*% xty -
ytx %*% betahat[i,] + t(betahat[i,]) %*% xtx %*% betahat[i,])
sigmasqhat[i] <- 1/rgamma(1,shape=num.samp/2,scale=(sigmasqscale.pre/2)^(-1))
} # end i
Remove starting value for sigmasqhat and NA for starting value of betahat
betahat <- betahat[-1,]
sigmasqhat <- sigmasqhat[-1]
# Compare the true value and the simulated one
paste("beta =", paste(beta,collapse = " "))
[1] "beta = -0.35 0.88 -0.46 0.43 -1.71"
paste("beta.mean =", paste(round(betahat.mean,2),collapse = " "))
[1] "beta.mean = -0.36 0.86 -0.45 0.43 -1.73"
paste("betahat =", paste(round(betahat[num.samp,],2),collapse = " "))
[1] "betahat = -0.35 0.87 -0.41 0.46 -1.76"
Start
# num.predictors <- dim(xtx)[1]
betahat <- matrix(NA, nrow=Tsamp.out, ncol = dim(xtx)[1]) # betahat = 101 \times 5
sigmasqhat <- rep(NA,Tsamp.out) # sigmasqhat = 101
# set starting value for sigmasqhat
sigmasq.init = 1.0;
beta.prior.mean=rep(0,dim(xtx)[1])
beta.prior.var=diag(dim(xtx)[1])
beta.prior.var.inv=chol2inv(chol(beta.prior.var))
sigmasqhat[1] <- sigmasq.init
betahat.pre1 <- beta.prior.var.inv # C^{-1}
betahat.pre2 <- betahat.pre1 %*% beta.prior.mean # C^{-1} %*% μ
Posterior variance of betahat \(\hat\beta\)
for (i in 2:Tsamp.out){
betahat.var <- chol2inv(chol((betahat.pre1+(1/sigmasqhat[i-1]) * xtx))) # covariance=(C^{-1}+σ^{-2}(X'X)^{-1})
betahat.mean <- betahat.var %*% (betahat.pre2 + (1/sigmasqhat[i-1]) * xty) # mean=(C^{-1}+σ^{-2}(X'X))^{-1}(C^{-1}μ + σ^{-2}X'Y)
betahat[i,] <- mvrnorm(n=1, mu = betahat.mean,Sigma = betahat.var)
# simulate sigmasqhat
sigmasqscale.pre <- (yty - t(betahat[i,]) %*% xty -
ytx %*% betahat[i,] + t(betahat[i,]) %*% xtx %*% betahat[i,]) # (Y'Y-β'X'Y-Y'Xβ+β'X'Xβ)
sigmasqhat[i] <- 1/rgamma(1,shape=num.samp/2,scale=(sigmasqscale.pre/2)^(-1)) # Inverse Gamma (numsamp.data/2, (0.5(Y'Y-β'X'Y-Y'Xβ+β'X'Xβ))^{-1})
} # end
Remove starting value for sigmasqhat and NA for starting value of betahat
betahat <- betahat[-1,]
sigmasqhat <- sigmasqhat[-1]
# Compare the true value and the simulated one
paste("beta =", paste(beta,collapse = " "))
[1] "beta = -0.35 0.88 -0.46 0.43 -1.71"
paste("beta.mean =", paste(round(betahat.mean,2),collapse = " "))
[1] "beta.mean = -0.36 0.86 -0.45 0.43 -1.72"
paste("betahat =", paste(round(betahat[num.samp,],2),collapse = " "))
[1] "betahat = -0.41 0.89 -0.44 0.43 -1.76"